Example #1
0
def Arch2SQL(database, sqlfile, verbose):

    if verbose:
        sys.stderr.write("Retrieving data from {0} ...\n".format(database))
    newsource = Source(name='PDBarch', source="http://www-pdb.org/")
    outdir = os.path.join(os.path.join(os.path.abspath(sqlfile), '00'))
    Path.mkdir(outdir)
    sql_fd = gzip.open(os.path.join(outdir, '0000.sql.gz'), 'wb')
    sql_fd.write(start_transaction())
    sql_fd.write(newsource.toSQL())
    sql_fd.write(end_transaction())
    sql_fd.close()

    files_list_by_pdb = {}
    subdirs = ['archobj', 'superobj']
    for subdir in subdirs:
        for archobjfile in Path.list_files(os.path.join(database, subdir)):
            if archobjfile.endswith('.archObj'):
                data = tuple(
                    os.path.splitext(
                        os.path.split(archobjfile)[-1])[0].split('_')[2:])
                files_list_by_pdb[data] = archobjfile

    old_pdb = None
    newArchSet = None
    for dofdata in sorted(files_list_by_pdb):
        pdb = dofdata[0] + '_' + dofdata[1]
        if pdb != old_pdb:
            if old_pdb is not None:
                sql_fd.write(newArchSet.toSQL())
                sql_fd.write(end_transaction())
                sql_fd.close()
            outdir = os.path.join(
                os.path.join(os.path.abspath(sqlfile),
                             dofdata[0][1:3].lower()))
            Path.mkdir(outdir)
            if verbose:
                sys.stderr.write("Retrieving loops from {0} ...\n".format(pdb))
            sql_fd = gzip.open(os.path.join(outdir, pdb + '.sql.gz'), 'wb')
            sql_fd.write(start_transaction())
            if verbose:
                sys.stderr.write("Printing data from {0} ...\n".format(pdb))
            old_pdb = pdb
            newArchSet = Arch(pdb)
        newArchSet.archs = SSpair.load(files_list_by_pdb[dofdata])

    sql_fd.write(newArchSet.toSQL())
    sql_fd.write(end_transaction())
    sql_fd.close()
    if verbose:
        sys.stderr.write("End execution.\n")
Example #2
0
def TaxID2SQL(database, sqlfile, skip_download, verbose):
    taxid_connect = TaxIDlink(local=database)
    newsource = None
    if not skip_download:
        if verbose:
            sys.stderr.write(
                "Downloading TaxID database to {0} ...\n".format(database))
        taxid_connect.download()
        newsource = Source(name='taxid', source=taxid_connect.source)
        if verbose: sys.stderr.write("Download Finished.\n")
    else:
        if verbose: sys.stderr.write("Using previously downloaded database.\n")

    has_new = []
    if verbose: sys.stderr.write("Parsing TaxID.\n")
    if verbose: sys.stderr.write("Writing {0} ....\n".format(sqlfile))
    Path.mkdir(os.path.split(os.path.abspath(sqlfile))[0])
    sql_fd = gzip.open(sqlfile, 'wb')
    sql_fd.write(start_transaction())
    if newsource is not None:
        sql_fd.write(newsource.toSQL())

    for tax_line in taxid_connect.localTaxIDs:
        newtax = TaxID(inline=tax_line)
        if newtax.has_new: has_new.append(newtax.toSQL())
        else: sql_fd.write(newtax.toSQL() + "\n")

    sql_fd.write("\n".join(has_new) + "\n")
    sql_fd.write(end_transaction())
    sql_fd.close()
    if verbose: sys.stderr.write("End execution.\n")
def DrugBank2SQL(database, sqlfile, skip_download, verbose):
    drugbank_connect = DrugBanklink(local = database)
    newsource        = None
    if not skip_download:
        if verbose: sys.stderr.write("Downloading drugBank database to {0} ...\n".format(database))
        # drugbank_connect.download()
        newsource = Source(name = 'DrugBank', source = drugbank_connect.source)
        if verbose: sys.stderr.write("Download Finished.\n")
    else:
        if verbose: sys.stderr.write("Using previously downloaded database.\n")

    if verbose: sys.stderr.write("Parsing drugBank.\n")
    if verbose: sys.stderr.write("Writing {0} ....\n".format(sqlfile))
    Path.mkdir(os.path.split(os.path.abspath(sqlfile))[0])
    sql_fd = gzip.open(sqlfile, 'wb')
    sql_fd.write(start_transaction())
    sql_fd.write(Drug.preuniprotdeleted())
    if newsource is not None:
        sql_fd.write(newsource.toSQL())

    for drg_line in drugbank_connect.localDrugs:
        newdrg = Drug(inline = drg_line)
        sql_fd.write(newdrg.toSQL())

    sql_fd.write(Drug.afteruniprotdeleted())
    sql_fd.write(end_transaction())
    sql_fd.close()
    if verbose: sys.stderr.write("End execution.\n")
Example #4
0
def Enzyme2SQL(database, sqlfile, skip_download, verbose):
    enzyme_connect = Enzymelink(local=database)
    newsource = None
    if not skip_download:
        if verbose:
            sys.stderr.write(
                "Downloading Enzyme database to {0} ...\n".format(database))
        enzyme_connect.download()
        newsource = Source(name='enzyme', source=enzyme_connect.source)
        if verbose: sys.stderr.write("Download Finished.\n")
    else:
        if verbose: sys.stderr.write("Using previously downloaded database.\n")

    if verbose: sys.stderr.write("Parsing Enzyme.\n")
    if verbose: sys.stderr.write("Writing {0} ....\n".format(sqlfile))
    Path.mkdir(os.path.split(os.path.abspath(sqlfile))[0])
    sql_fd = gzip.open(sqlfile, 'wb')
    sql_fd.write(start_transaction())
    if newsource is not None:
        sql_fd.write(newsource.toSQL())

    transfers = []
    for enz_line in enzyme_connect.localEnzymes:
        newenz = Enzyme(inline=enz_line)
        sql_fd.write(newenz.toSQL())
        if newenz.has_transfers:
            transfers.append(newenz.transfered2SQL())

    sql_fd.write("".join(transfers))
    sql_fd.write(end_transaction())
    sql_fd.close()
    if verbose: sys.stderr.write("End execution.\n")
Example #5
0
def PDBeChem2SQL(database, sqlfile, skip_download, verbose):
    pdbechem_connect = PDBeChemlink(local = database)
    newsource        = None
    if not skip_download:
        if verbose: sys.stderr.write("Downloading PDBeChem database to {0} ...\n".format(database))
        pdbechem_connect.download()
        newsource = Source(name   = 'PDBeChem', source = pdbechem_connect.source)
        if verbose: sys.stderr.write("Download Finished.\n")
    else:
        if verbose: sys.stderr.write("Using previously downloaded database.\n")

    noparent_chems = []
    parent_chems   = []
    if verbose: sys.stderr.write("Parsing PDBeChem.\n")
    for chem_file in pdbechem_connect.localPDBeChems:
        if verbose: sys.stderr.write("\tReading {0} ....\n".format(chem_file))
        newchem = PDBeChem(chem_file)
        if newchem.parent is None: noparent_chems.append(newchem.toSQL())
        else: parent_chems.append(newchem.toSQL())

    if verbose: sys.stderr.write("Writing {0} ....\n".format(sqlfile))
    Path.mkdir(os.path.split(os.path.abspath(sqlfile))[0])
    sql_fd = gzip.open(sqlfile, 'wb')
    sql_fd.write(start_transaction())
    if newsource is not None:
        sql_fd.write(newsource.toSQL())
    for e in element_dic.values():
        newelement = Element(e.number, e.symbol, e.name)
        sql_fd.write(newelement.toSQL() + "\n")
    sql_fd.write("\n".join(noparent_chems) + "\n")
    sql_fd.write("\n".join(parent_chems) + "\n")
    sql_fd.write(end_transaction())
    sql_fd.close()
    if verbose: sys.stderr.write("End execution.\n")
Example #6
0
def PDBTM2SQL(database, sqlfile, skip_download, verbose):
    pdbtm_connect = PDBTMlink(local=database)
    newsource = None
    if not skip_download:
        if verbose:
            sys.stderr.write(
                "Downloading PDBTM database to {0} ...\n".format(database))
        #pdbtm_connect.download()
        newsource = Source(name='enzyme', source=pdbtm_connect.source)
        if verbose: sys.stderr.write("Download Finished.\n")
    else:
        if verbose: sys.stderr.write("Using previously downloaded database.\n")

    if verbose: sys.stderr.write("Parsing PDBTM.\n")
    if verbose: sys.stderr.write("Writing {0} ....\n".format(sqlfile))
    Path.mkdir(os.path.split(os.path.abspath(sqlfile))[0])
    sql_fd = gzip.open(sqlfile, 'wb')
    sql_fd.write(start_transaction())
    sql_fd.write(TM.prepdbdeleted())
    if newsource is not None:
        sql_fd.write(newsource.toSQL())

    sql_fd.write(TM.regions2SQL())
    for line in pdbtm_connect.localTM:
        tmdata = TM(inline=line)
        sql_fd.write(tmdata.toSQL())

    sql_fd.write(TM.afterpdbdeleted())
    sql_fd.write(end_transaction())
    sql_fd.close()
    if verbose: sys.stderr.write("End execution.\n")
Example #7
0
def SCOP2SQL(database, sqlfile, skip_download, verbose):
    scop_connect = SCOPlink(local = database)
    newsource      = None
    if not skip_download:
        if verbose: sys.stderr.write("Downloading SCOP database to {0} ...\n".format(database))
        scop_connect.download()
        newsource = Source(name = 'enzyme', source = scop_connect.source)
        if verbose: sys.stderr.write("Download Finished.\n")
    else:
        if verbose: sys.stderr.write("Using previously downloaded database.\n")

    if verbose: sys.stderr.write("Parsing SCOP.\n")
    if verbose: sys.stderr.write("Writing {0} ....\n".format(sqlfile))
    Path.mkdir(os.path.split(os.path.abspath(sqlfile))[0])
    sql_fd = gzip.open(sqlfile, 'wb')
    sql_fd.write(start_transaction())
    if newsource is not None:
        sql_fd.write(newsource.toSQL())

    transfers = []
    scop_obj = SCOP()
    for line in scop_connect.descriptions:
        scop_obj.add_description(line.strip())
    for line in scop_connect.relations:
        scop_obj.add_relation(line.strip())

    sql_fd.write(SCOP.prepdbdeleted())
    sql_fd.write(scop_obj.toSQL())
    sql_fd.write(SCOP.afterpdbdeleted())
    
    sql_fd.write(end_transaction())
    sql_fd.close()
    if verbose: sys.stderr.write("End execution.\n")
Example #8
0
def CDhit2SQL(database, sqlfile, verbose):

    if verbose:
        sys.stderr.write("Retrieving data from {0} ...\n".format(database))
    cdhit = CDhit(database)

    if verbose: sys.stderr.write("Writing {0} ....\n".format(sqlfile))
    Path.mkdir(os.path.split(os.path.abspath(sqlfile))[0])
    sql_fd = gzip.open(sqlfile, 'wb')
    sql_fd.write(start_transaction())

    sql_fd.write(cdhit.toSQL())

    sql_fd.write(end_transaction())
    sql_fd.close()
    if verbose: sys.stderr.write("End execution.\n")
def enrichment2SQL(infile, table, rel, verbose):

    if verbose:
        sys.stderr.write("Retrieving data from {0} ...\n".format(infile))
    if verbose: sys.stderr.write("\tTo table {0} ...\n".format(table))
    start_command = "INSERT INTO {0} VALUES ".format(table)
    sys.stdout.write(start_transaction() + "\n")
    fd = open(infile)
    for line in fd:
        d = line.strip().split()
        d[1] = d[1] if rel in ['scop', 'go'] else "'" + d[1] + "'"
        sys.stdout.write(
            "{0} ({1[0]},{1[1]},'{1[7]}',{1[2]},'{1[8]}','{1[9]}','{1[10]}');\n"
            .format(start_command, d))
    sys.stdout.write(end_transaction())

    if verbose: sys.stderr.write("End execution.\n")
Example #10
0
def Uniprot2SQL(database, sqlfile, skip_download, verbose):
    uniprot_connect = Uniprotlink(local=database)
    newsource = None

    if not options.skip_download:
        if verbose:
            sys.stderr.write(
                "Downloading Uniprot database to {0} ...\n".format(database))
        uniprot_connect.download()
        newsource = Source(name='uniprot', source=uniprot_connect.source)
        if verbose: sys.stderr.write("Download Finished.\n")
    else:
        if verbose: sys.stderr.write("Using previously downloaded database.\n")

    file_counter = 1
    file_sequence = 0
    file_sql_name = sqlfile.replace('_', '{0:03}')

    if verbose: sys.stderr.write("Parsing Uniprot.\n")
    if verbose:
        sys.stderr.write("Writing {0} ....\n".format(
            file_sql_name.format(file_counter)))
    Path.mkdir(os.path.split(os.path.abspath(sqlfile))[0])
    sql_fd = gzip.open(file_sql_name.format(file_counter), 'wb')
    sql_fd.write(start_transaction())
    if newsource is not None:
        sql_fd.write(newsource.toSQL())

    for uni_line in uniprot_connect.localUniprots:
        newuni = Uniprot(inline=uni_line)
        if file_sequence > 500000:
            sql_fd.write(end_transaction())
            sql_fd.close()
            file_sequence = 0
            file_counter += 1
            if verbose:
                sys.stderr.write("Writing {0} ....\n".format(
                    file_sql_name.format(file_counter)))
            sql_fd = gzip.open(file_sql_name.format(file_counter), 'wb')
            sql_fd.write(start_transaction())

        sql_fd.write(newuni.toSQL())
        file_sequence += 1
    if verbose: sys.stderr.write("End execution.\n")
Example #11
0
def GO2SQL(database, sqlfile, skip_download, verbose):
    go_connect = GOlink(local=database)
    newsource = None
    if not skip_download:
        if verbose:
            sys.stderr.write(
                "Downloading GO database to {0} ...\n".format(database))
        go_connect.download()
        newsource = Source(name='GO', source=go_connect.source)
        if verbose: sys.stderr.write("Download Finished.\n")
    else:
        if verbose: sys.stderr.write("Using previously downloaded database.\n")

    with_parents = []
    with_relations = []

    if verbose: sys.stderr.write("Parsing GO.\n")
    if verbose: sys.stderr.write("Writing {0} ....\n".format(sqlfile))
    Path.mkdir(os.path.split(os.path.abspath(sqlfile))[0])
    sql_fd = gzip.open(sqlfile, 'wb')
    sql_fd.write(start_transaction())
    if newsource is not None:
        sql_fd.write(newsource.toSQL())

    for go_line in go_connect.localGOs:
        newGO = GOterm(inline=go_line)
        sql_fd.write(newGO.toSQL() + "\n")
        if len(newGO.relations) > 0:
            with_relations.append(newGO)
        if len(newGO.parents) > 0:
            with_parents.append(newGO)

    for GO in with_relations:
        sql_fd.write(GO.relations2SQL() + "\n")
    for GO in with_parents:
        sql_fd.write(GO.parents2SQL() + "\n")

    sql_fd.write(end_transaction())
    sql_fd.close()
    if verbose: sys.stderr.write("End execution.\n")
Example #12
0
def PDB2SQL(database, seqdatabase, listfiles, sqlfile, skip_download, verbose):
    pdb_connect = PDBlink(local=database, PDBseq=seqdatabase)
    newsource = None

    if not skip_download:
        if verbose:
            sys.stderr.write(
                "Syncronizing PDB database to {0} ...\n".format(database))
        pdb_connect.sync_PDB(log_file=os.path.join(database, 'PDB.sync.log'))
        newsource = Source(name='PDB', source=pdb_connect.source)
        if verbose:
            sys.stderr.write(
                "Creating PDBseq in {0} ...\n".format(seqdatabase))
        pdb_connect.make_PDBseq(
            log_file=os.path.join(seqdatabase, 'PDB.seq.log'))
        if verbose: sys.stderr.write("Download Finished.\n")
        outdir = os.path.abspath(os.path.join(sqlfile, '00'))
        Path.mkdir(outdir)
        sql_fd = gzip.open(os.path.join(outdir, '0000.sql.gz'), 'wb')
        sql_fd.write(start_transaction())
        sql_fd.write(newsource.toSQL())
        sql_fd.write(end_transaction())
        sql_fd.close()
    else:
        if verbose: sys.stderr.write("Using previously downloaded database.\n")

    files2check = set()
    if listfiles is not None:
        fd = open(listfiles)
        for line in fd:
            files2check.add(line.strip())
        fd.close()
        logfd = open(listfiles + ".log", "w")
    else:
        logfd = open("PDB2SQL.log", "w")
    import traceback
    for pdbfile in pdb_connect.localPDBs:
        try:
            if listfiles is not None and pdbfile not in files2check:
                if len(files2check) == 0: break
                continue
            #else:
            # files2check.add(pdbfile)
            # files2check.remove(pdbfile)
            if verbose: sys.stderr.write("Working file {0}\n".format(pdbfile))
            newPDB = PDB(pdb_file=pdbfile)
            outsqldir = os.path.join(sqlfile, newPDB.id[1:3].lower())
            Path.mkdir(outsqldir)
            outsqlfile = os.path.join(outsqldir, newPDB.id + '.sql.gz')
            # outsqlfile = os.path.join(os.getcwd(), newPDB.id + '.sql.gz')
            if verbose:
                sys.stderr.write(
                    "\tOutput SQL file is {0}.\n".format(outsqlfile))
            sql_fd = gzip.open(outsqlfile, 'wb')
            sql_fd.write(start_transaction())
            sql_fd.write(PDB.preuniprotdeleted())
            sql_fd.write(newPDB.toSQL())
            sql_fd.write(PDB.afteruniprotdeleted())
            sql_fd.write(end_transaction())
            sql_fd.close()
        except KeyboardInterrupt:
            raise
        except:
            if verbose:
                sys.stderr.write("\tAn error occurred. Check log file\n")
            SBIglobals.alert(
                'error', None,
                '\tAn error occurred for {0} . Check log file'.format(pdbfile))
            logfd.write("FILE {0}\n".format(pdbfile))
            logfd.write(traceback.format_exc())
            logfd.write("\n")
Example #13
0
def DS2SQL(database, looplist, sqlfile, verbose):

    Path.mkdir(sqlfile)
    for dsfile in Path.list_files(database):
        subclasstype = os.path.split(dsfile)[-1].split('.')[1]
        classification = Cclass(subclasstype)
        if verbose:
            sys.stderr.write(
                "Retrieving data for subclass {0} ...\n".format(subclasstype))
        loops = readlist(looplist, subclasstype)
        sql_fd = gzip.open(os.path.join(sqlfile, subclasstype + '.sql.gz'),
                           'wb')
        sql_fd.write(start_transaction())
        sql_in = open(dsfile)
        read = False
        for line in sql_in:
            dataline = line.rstrip('\n')
            #SKIP LINES
            if line.startswith('==') or line.startswith('***') or len(
                    line.strip()) == 0 or line.startswith(
                        '---- P R O T E I N    C O D E  ----'):
                continue
            if line.startswith('CONSENSUS & MULTIPLE ALIGNEMENT IN THE'):
                data = line.split(':')[-1].strip().split()
                classification.subclasses = Subclass(
                    tuple([data[0].strip(), data[3].strip()]), data[4])
                workscls = classification.lastsubclass
                read = True
                continue
            if line.startswith('GLOBAL STATISTICS'):
                read = False
                continue
            if read:
                if line.startswith(
                        '        SEQUENCE   ALIGNEMENT                           :'
                ):
                    parse_mode, counter = 'P', 0
                elif line.startswith(
                        '       ACCESSIBLE SURFACE ALIGNEMENT                    :'
                ):
                    parse_mode, counter = 'E', 0
                elif line.startswith(
                        '           RAMACHANDRAN                                 :'
                ):
                    parse_mode, counter = 'R', 0
                elif line.startswith(
                        '        SECONDARY STRUCTURE                             :'
                ):
                    parse_mode, counter = 'S', 0
                elif line.startswith('--------- CONSENSUS THORNTON       :'):
                    workscls.add_consensus(dataline, 'DS', loops)
                elif line.startswith('--------- CONSENSUS TOPOLOGY'):
                    workscls.add_topology(dataline, 'DS')
                elif line.startswith('CENTROIDE POLAR COORD.   :'):
                    workscls.add_coordinates(dataline)
                elif line.startswith('--------- RAMACHANDRAN PATTERN     :'):
                    workscls.ram_pat = re.sub(
                        '\(X\)', '',
                        dataline.split(':')[1].strip().strip('.'))
                elif line.startswith('--------- SEQUENCE  PATTERN        :'):
                    workscls.seq_pat = re.sub(
                        '\(X\)', '',
                        dataline.split(':')[1].strip().strip('.'))
                elif line.startswith('--------- BURIAL    PATTERN        :'):
                    workscls.exp_pat = re.sub(
                        '\(X\)', '',
                        dataline.split(':')[1].strip().strip('.'))

                elif line.startswith('                             '
                                     ) and len(dataline) < 400:
                    if parse_mode == 'P': workscls.loops = Loop(info=dataline)
                    if parse_mode == 'E':
                        workscls.loops[counter].add_surface(info=dataline)
                        counter += 1
                    if parse_mode == 'R':
                        workscls.loops[counter].add_ramachandran(info=dataline)
                        counter += 1
                    if parse_mode == 'S':
                        workscls.loops[counter].add_secondary_str(
                            info=dataline)
                        counter += 1

        sql_fd.write(classification.toSQL('DS'))

        sql_in.close()
        sql_fd.write(end_transaction())
        sql_fd.close()

    if verbose: sys.stderr.write("End execution.\n")