import GEOparse import numpy as np import pandas as pd import matplotlib.pyplot as plt probes_conv = GEOparse.parse_GSM( \ "/home/cyril/Documents/Master/sem_1/Case_study/module1/data/GPL6457_old_annotations.txt.gz") gse = GEOparse.get_GEO("GSE24616", destdir="./") # gse = GEOparse.get_GEO(filepath="./GSM606890.TXT.GZ.soft") char = {"stage": [], "time": [], "sex": [], "sample_name": []} for gsm_name, gsm in sorted(gse.gsms.iteritems()): char["stage"].append(gsm.metadata['characteristics_ch1'][1].split(": ")[1]) char["time"].append(gsm.metadata['characteristics_ch1'][2].split(": ")[1]) char["sex"].append(gsm.metadata['characteristics_ch1'][3].split(": ")[1]) char["sample_name"].append(gsm.name) print(char["stage"][3], char["time"][3], char["sex"][3], char["sample_name"][3]) GPL = gse.gpls.values()[0] pivoted_samples = gse.pivot_samples('VALUE') pivoted_samples.set_index(GPL.table.SPOT_ID, inplace=True) # pivoted_samples.hist() strata = pd.read_csv("../phylostrata.txt", sep="\t", header=None) strata.columns = ["GeneID", "ProbeID", "age"] strata.set_index("ProbeID", inplace=True)