Ejemplo n.º 1
0
def main(generator=False):
    os.chdir(path)
    # Version:
    url = 'http://thebiogrid.org/'
    f = urllib.urlopen(url)
    contents = f.read()
    f.close()
    lines = contents.split('\n')
    for line in lines:
        if '<div class="newspost-title">BioGRID Version' in line:
            print line
            VERSION = line.split('<div class="newspost-title">BioGRID Version ')[1].split(' Release ')[0]
            break
    print VERSION

    # Urls:
    url = 'http://thebiogrid.org/downloads/archives/Release%20Archive/BIOGRID-'
    tab2url = url+VERSION+'/BIOGRID-ALL-'+VERSION+'.tab2.zip'
    mitaburl = url+VERSION+'/BIOGRID-ALL-'+VERSION+'.mitab.zip'

    # Files:
    folder = Folder()
    folder.get([tab2url, mitaburl])

    tab2 = folder.contains('tab2')[0].parse(printing=False, seperator=None)
    mitab = folder.contains('mitab')[0].parse(printing=False, seperator=None)

    # Parsing:
    header = tab2[0].split('\t')
    D = {}
    output = open(os.path.join(path, 'interactions.txt'), 'w')

    for x in xrange(0, len(tab2)):
        line = tab2[x]
        if "#BioGRID Interaction ID" not in line and line != "":
            columns = line.split('\t')
            if int(columns[0]) not in D: D[int(columns[0])] = {}
            systematic_name_intactor_a = columns[5]
            systematic_name_intactor_b = columns[6]
            official_gene_symbol_a = columns[7]
            official_gene_symbol_b = columns[8]
            synonymns_interactor_a = columns[9].split('|')
            synonymns_interactor_b = columns[10].split('|')
            
            AliasA, AliasB = [columns[1]], [columns[2]]

            if systematic_name_intactor_a != '-' and systematic_name_intactor_a not in AliasA: AliasA.append(systematic_name_intactor_a)
            if systematic_name_intactor_b != '-' and systematic_name_intactor_b not in AliasB: AliasB.append(systematic_name_intactor_b)
            if official_gene_symbol_a != '-' and official_gene_symbol_a not in AliasA: AliasA.append(official_gene_symbol_a)
            if official_gene_symbol_b != '-' and official_gene_symbol_b not in AliasB: AliasB.append(official_gene_symbol_b)
            for i in synonymns_interactor_a:
                if i != "-" and i not in AliasA: AliasA.append(i)
            for i in synonymns_interactor_b:
                if i != "-" and i not in AliasB: AliasB.append(i)

            experimental_system_type = [columns[12], 'direct']
            experimental_system = columns[11]

            type = mitab[x].split('\t')[11].split('(')[1].split(')')[0]
            
            pmid = int(columns[14])
            taxid_a = int(columns[15])
            if taxid_a == 559292: taxid_a = 4932
            taxid_b = int(columns[16])
            if taxid_b == 559292: taxid_b = 4932
            throughput = columns[17].split('|')
            if columns[19] != '-':
                modification = columns[19]
                #print modification
            else: modification =  ''
            source = columns[23]   #'BioGRID'

            r = '\t'.join(['; '.join(AliasA), '; '.join(AliasB), '; '.join(experimental_system_type), experimental_system, type,  modification, str(taxid_a), str(taxid_b), str(pmid), source+'\n'])
            output.write(r)
                    
            D[int(columns[0])][int(columns[1])] = {'source':source,
                                       'experimental_system':experimental_system,
                                       'experimental_system_type':experimental_system_type,
                                       'taxid_a':taxid_a,
                                       'taxid_b':taxid_b,
                                       'pmid':pmid,
                                       'throughput':throughput}
    output.close()
    new_entry = '\n%s %s' % (VERSION, datetime.datetime.now())
    version_file = open(os.path.join(path, 'version.txt'), 'a')
    version_file.write(new_entry)
    version_file.close()

    #Cleaning up:
    filenames = os.listdir(path)
    for filename in filenames:
        if "BIOGRID" in filename:
            print("Deleting %s" % filename)
            os.remove(os.path.join(filename))
Ejemplo n.º 2
0
def main(generator=False):
    os.chdir(path)
    # Version:
    url = 'http://thebiogrid.org/'
    f = urllib.urlopen(url)
    contents = f.read()
    f.close()
    lines = contents.split('\n')
    for line in lines:
        if '<div class="newspost-title">BioGRID Version' in line:
            print line
            VERSION = line.split('<div class="newspost-title">BioGRID Version '
                                 )[1].split(' Release ')[0]
            break
    print VERSION

    # Urls:
    url = 'http://thebiogrid.org/downloads/archives/Release%20Archive/BIOGRID-'
    tab2url = url + VERSION + '/BIOGRID-ALL-' + VERSION + '.tab2.zip'
    mitaburl = url + VERSION + '/BIOGRID-ALL-' + VERSION + '.mitab.zip'

    # Files:
    folder = Folder()
    folder.get([tab2url, mitaburl])

    tab2 = folder.contains('tab2')[0].parse(printing=False, seperator=None)
    mitab = folder.contains('mitab')[0].parse(printing=False, seperator=None)

    # Parsing:
    header = tab2[0].split('\t')
    D = {}
    output = open(os.path.join(path, 'interactions.txt'), 'w')

    for x in xrange(0, len(tab2)):
        line = tab2[x]
        if "#BioGRID Interaction ID" not in line and line != "":
            columns = line.split('\t')
            if int(columns[0]) not in D: D[int(columns[0])] = {}
            systematic_name_intactor_a = columns[5]
            systematic_name_intactor_b = columns[6]
            official_gene_symbol_a = columns[7]
            official_gene_symbol_b = columns[8]
            synonymns_interactor_a = columns[9].split('|')
            synonymns_interactor_b = columns[10].split('|')

            AliasA, AliasB = [columns[1]], [columns[2]]

            if systematic_name_intactor_a != '-' and systematic_name_intactor_a not in AliasA:
                AliasA.append(systematic_name_intactor_a)
            if systematic_name_intactor_b != '-' and systematic_name_intactor_b not in AliasB:
                AliasB.append(systematic_name_intactor_b)
            if official_gene_symbol_a != '-' and official_gene_symbol_a not in AliasA:
                AliasA.append(official_gene_symbol_a)
            if official_gene_symbol_b != '-' and official_gene_symbol_b not in AliasB:
                AliasB.append(official_gene_symbol_b)
            for i in synonymns_interactor_a:
                if i != "-" and i not in AliasA: AliasA.append(i)
            for i in synonymns_interactor_b:
                if i != "-" and i not in AliasB: AliasB.append(i)

            experimental_system_type = [columns[12], 'direct']
            experimental_system = columns[11]

            type = mitab[x].split('\t')[11].split('(')[1].split(')')[0]

            pmid = int(columns[14])
            taxid_a = int(columns[15])
            if taxid_a == 559292: taxid_a = 4932
            taxid_b = int(columns[16])
            if taxid_b == 559292: taxid_b = 4932
            throughput = columns[17].split('|')
            if columns[19] != '-':
                modification = columns[19]
                #print modification
            else:
                modification = ''
            source = columns[23]  #'BioGRID'

            r = '\t'.join([
                '; '.join(AliasA), '; '.join(AliasB),
                '; '.join(experimental_system_type), experimental_system, type,
                modification,
                str(taxid_a),
                str(taxid_b),
                str(pmid), source + '\n'
            ])
            output.write(r)

            D[int(columns[0])][int(columns[1])] = {
                'source': source,
                'experimental_system': experimental_system,
                'experimental_system_type': experimental_system_type,
                'taxid_a': taxid_a,
                'taxid_b': taxid_b,
                'pmid': pmid,
                'throughput': throughput
            }
    output.close()
    new_entry = '\n%s %s' % (VERSION, datetime.datetime.now())
    version_file = open(os.path.join(path, 'version.txt'), 'a')
    version_file.write(new_entry)
    version_file.close()

    #Cleaning up:
    filenames = os.listdir(path)
    for filename in filenames:
        if "BIOGRID" in filename:
            print("Deleting %s" % filename)
            os.remove(os.path.join(filename))