Ejemplo n.º 1
0
def getDataFromUniprot(fasta, accs):
    global fetchme
    uniprot_file = fasta + ".uniprot"
    print uniprot_file
    lastline = tail(uniprot_file, 1)
    lastline = lastline[0]
    if lastline:
        lastline = lastline.rstrip()
        print "::::::::::: " + lastline
        if lastline != "</uniprot>":
            with open(uniprot_file, "a") as f:
                f.write("</uniprot>\n")

    handle = open(uniprot_file)
    cntr = 0
    proteins = {}
    for record in UniprotIterator(handle):
        print record.id, record.name
        proteins[record.name] = Protein(xmlrecord=record)
        cntr = cntr + 1
    # if cntr > 1000:
    # break
    if accs != None:
        accs2get = [acc for acc in accs if acc not in proteins.keys()]
        if len(accs2get) > 0:
            print "fetching: "
            print accs2get
            if fetchme:
                fetchUniprotEntries(fasta, accs2get)
            return getDataFromUniprot(fasta, None)

    print "There are total of " + str(len(proteins)) + " entries mapped in uniprot"
    return proteins
Ejemplo n.º 2
0
def fetchUniprotEntries(fasta, newaccs=None):
    print "Fetching uniprot entries.."
    uniprot_file = fasta + ".uniprot"
    already_exists = False
    if os.path.isfile(uniprot_file):
        already_exists = True
        lastline = tail(uniprot_file, 1)
        lastline = lastline[0]
        lastline = lastline.rstrip()

        #TODO: change head linux use to the appropriate python command
        if lastline == "</uniprot>":
            lines = file(uniprot_file, 'r').readlines()
            del lines[-1]
            file(uniprot_file, 'w').writelines(lines)
            #cmd = "head -n -1 " + uniprot_file + " > " + uniprot_file + ".2; " + "mv " + uniprot_file + ".2 " + uniprot_file
            #print cmd
            #os.system(cmd)

    fasta_ids = newaccs
    if newaccs == None:
        fasta_ids = getACCsFromFastaFile(fasta)
    print fasta_ids
    if fasta_ids:
        l = range(0, len(fasta_ids))
        chunks = FetchUtils.getChunks(l, 50)
        for i in range(0, len(chunks)):
            print "Fetching " + str(i) + " chunk out of " + str(len(chunks))
            sys.stdout.flush()
            accs = [fasta_ids[j] for j in chunks[i]]
            # fetchBatch <dbName> <id1,id2,...> [formatName [styleName]] [options...]
            try:
                uniprots = FetchUtils.returnFetchBatch('uniprot', ','.join(accs), 'xml', 'default')
                with open(fasta + ".uniprot", "a") as f:
                    if not i == 0 or already_exists:
                        uniprots = re.sub(r"\<uniprot.+", '', uniprots)
                        uniprots = re.sub(r"\<\?xml version=.+\>", '', uniprots)
                    if not i == len(chunks) - 1:
                        uniprots = re.sub(r"\</uniprot.*>", '', uniprots)
                    f.write(uniprots)
            except:
                print "Coundnot fetch this chunk"
                continue
            else:
                print "succeeded in fetching chunk"
                continue
    return (fasta_ids)