def bisect(args): """ %prog bisect acc accession.fasta determine the version of the accession by querying entrez, based on a fasta file. This proceeds by a sequential search from xxxx.1 to the latest record. """ p = OptionParser(bisect.__doc__) p.set_email() opts, args = p.parse_args(args) if len(args) != 2: sys.exit(not p.print_help()) acc, fastafile = args arec = get_first_rec(fastafile) valid = None for i in range(1, 100): term = "%s.%d" % (acc, i) try: query = list(batch_entrez([term], email=opts.email)) except AssertionError as e: logging.debug("no records found for %s. terminating." % term) return id, term, handle = query[0] brec = next(SeqIO.parse(handle, "fasta")) match = print_first_difference(arec, brec, ignore_case=True, ignore_N=True, rc=True) if match: valid = term break if valid: printf() printf("[green]{} matches the sequence in `{}`".format( valid, fastafile))
def bisect(args): """ %prog bisect acc accession.fasta determine the version of the accession by querying entrez, based on a fasta file. This proceeds by a sequential search from xxxx.1 to the latest record. """ p = OptionParser(bisect.__doc__) p.set_email() opts, args = p.parse_args(args) if len(args) != 2: sys.exit(not p.print_help()) acc, fastafile = args arec = get_first_rec(fastafile) valid = None for i in range(1, 100): term = "%s.%d" % (acc, i) try: query = list(batch_entrez([term], email=opts.email)) except AssertionError as e: logging.debug("no records found for %s. terminating." % term) return id, term, handle = query[0] brec = SeqIO.parse(handle, "fasta").next() match = print_first_difference(arec, brec, ignore_case=True, ignore_N=True, rc=True) if match: valid = term break if valid: print print green("%s matches the sequence in `%s`" % (valid, fastafile))