Example #1
0
def createDataPackage(dataFile, subset1, subset2 ):
    """
    Builds a DataPackager
    """
    from AUREA.packager.DataPackager import dataPackager
    from AUREA.parser.SOFTParser import SOFTParser
    from AUREA.packager.DataCleaner import DataTable
    from AUREA.parser.GMTParser import GMTParser
    #get soft file
    sp = SOFTParser( dataFile )
    ss_list = [None, None]
    for ss in sp.getSubsets():
        if subset1 in ss.attributes['subset_description']:
            ss_list[0] = ss
        if subset2 in ss.attributes['subset_description']:
            ss_list[1] = ss
    subsetSamples1 = sp.getSubsetSamples( ss_list[0] )
    subsetSamples2 = sp.getSubsetSamples( ss_list[1] )
    #make a data table
    dt = DataTable()
    dt.getSOFTData( sp )
    #add stuff to build networks
    gene_network_file = AUREA_dir +"/workspace/data/c2.biocarta.v2.5.symbols.gmt"
    synonym_file = AUREA_dir + "/workspace/data/Homo_sapiens.gene_info.gz"
    
    #set up classes
    dp = dataPackager()
    dp.addSynonyms(synonym_file)
    gn = GMTParser(gene_network_file) 
    dp.addGeneNetwork(gn.getAllNetworks())
    dp.addDataTable(dt)
    dp.createClassification(subset1)
    dp.createClassification(subset2)
    for sample in subsetSamples1:
        dp.addToClassification( subset1, dt.dt_id, sample )
    for sample in subsetSamples2:
        dp.addToClassification( subset2, dt.dt_id, sample )
    return dp
Example #2
0
from AUREA.parser.CSVParser import CSVParser
from AUREA.packager.DataCleaner import DataTable
from AUREA.packager.DataPackager import dataPackager
csvFile = "data/testPackagerProbes1.csv"
p = CSVParser(csvFile,probe_column_name="probe", gene_column_name="probe")

print p.getColumnHeadings()
print p.getColumnHeadingsInfo()
#print p.getTable()
print p.getID_REF()
print p.getIDENTIFIER()
dt = DataTable(probe_column="probe", gene_column="probe")
dt.getCSVData(p)

dp = dataPackager()
dp.addDataTable(dt)
dp.writeToCSV("copyofacopy.csv")





Example #3
0
from AUREA.packager.DataPackager import dataPackager
if __name__ == "__main__":
    path = "/home/earls3/Price/AUREA/workspace/data/"
    f1 = "GDS2545.soft.gz"
    sp = SOFTParser(path + f1)
    t1s = []
    t2s = []
    for x in sp.getSubsets():
        if x.attributes['subset_description'][0] == 'normal prostate tissue':
            t1s = sp.getSubsetSamples(x)
        if x.attributes['subset_description'][0] == 'primary prostate tumor':
            t2s = sp.getSubsetSamples(x)
            
    dt = DataTable()
    dt.getSOFTData(sp)
    dp = dataPackager(merge_cache=".")
    dp.addDataTable(dt)
    dp.createClassification("Normal")
    dp.createClassification("ignore")
    for samp in t1s:
        dp.addToClassification("Normal", dt.dt_id, samp)
    dp.writeToCSV("normal.csv", key='probe')
    dp.clearClassification()
    dp.createClassification("Tumor")
    dp.createClassification("ignore")
    for samp in t2s:
        dp.addToClassification("Tumor", dt.dt_id, samp)
    dp.writeToCSV("tumor.csv", key='probe')