Beispiel #1
0
def getDataPackage():
    dd = "/home/earls3/Price/AUREAPackage/src/AUREA/data/"
    softfile = dd+"GDS2545.soft.gz"


    gnfile  = dd+"c2.biocarta.v2.5.symbols.gmt"
    synfile = dd+"Homo_sapiens.gene_info.gz"
    gnf = GMTParser.GMTParser(gnfile)

    sp = SOFTParser.SOFTParser(softfile)

    normal = []
    tumor = []
    for line in sp.column_heading_info[0]:
        if string.find(line[1], 'normal prostate tissue free') > 0:
            normal.append(line[0].strip())
        elif string.find(line[1], 'tumor') > 0:
            tumor.append(line[0].strip())

    dt = DataCleaner.DataTable()
    dt.getSOFTData(sp)
    dp = DataPackager.dataPackager()
    dp.addGeneNetwork(gnf.getAllNetworks())
    dp.addDataTable(dt)
    dp.addSynonyms(synfile)
    dp.createClassification("Tumor")
    dp.createClassification("Normal")
    for samp in tumor:
        dp.addToClassification("Tumor", dt.dt_id, samp)


    for samp in normal[:-1]:
        dp.addToClassification("Normal", dt.dt_id, samp)
    dp.setUnclassified(dt.dt_id, normal[-1])
    return dp
Beispiel #2
0
def buildData(file1, file2, config):
    """
    Takes the 2 csv file names and the config object and returns the datapackage
    """
    gnfile  = "c2.biocarta.v2.5.symbols.gmt"
    synfile = "Homo_sapiens.gene_info.gz"

    collision = config.getSetting("datatable", "Gene Collision Rule")[0]
    bad_data = config.getSetting("datatable", "Bad Data Value")[0]
    gene_column = config.getSetting("datatable", "Gene Column")[0]
    probe_column = config.getSetting("datatable", "Probe Column")[0]

    gnf = GMTParser.GMTParser(gnfile)
    #VC: edit here
    #create GEO Data Getter
    #f1=GEODataGetter()

    f1 = CSVParser.CSVParser(file1, probe_column_name=probe_column, gene_column_name=gene_column)
    f2 = CSVParser.CSVParser(file2, probe_column_name=probe_column, gene_column_name=gene_column)
    #create a data table
    dt1 = DataCleaner.DataTable(probe_column, gene_column, collision, bad_data)
    dt1.getCSVData(f1)
    dt2 = DataCleaner.DataTable(probe_column, gene_column, collision, bad_data)
    dt2.getCSVData(f2)
    #VC: done edit
    
    dp = DataPackager.dataPackager(merge_cache=".")
    dp.addGeneNetwork(gnf.getAllNetworks())
    dp.addSynonyms(synfile)
    #add data table
    dp.addDataTable(dt1)
    dp.addDataTable(dt2)
    

    #create subsets(classes)
    dp.createClassification("f1")
    for samp in f1.getDataColumnHeadings():
        dp.addToClassification("f1", dt1.dt_id, samp)
    dp.createClassification("f2")
    for samp in f2.getDataColumnHeadings():
        dp.addToClassification("f2", dt2.dt_id, samp)
    return dp