def main(): # parse options option, args = doc_optparse.parse(__doc__) if len(args) < 2: doc_optparse.exit() # try opening the file both ways, in case the arguments got confused try: gff_file = gff.input(args[1]) twobit_file = twobit.input(args[0]) except Exception: gff_file = gff.input(args[0]) twobit_file = twobit.input(args[1]) for record in gff_file: if record.seqname.startswith("chr"): chr = record.seqname else: chr = "chr" + record.seqname ref_seq = twobit_file[chr][(record.start - 1):record.end] if option.diff: if record.attributes.has_key("ref_allele"): if record.attributes["ref_allele"].strip("\"") == ref_seq.upper(): continue record.attributes["ref_allele"] = ref_seq.upper() print record
def main(): # parse options option, args = doc_optparse.parse(__doc__) if len(args) < 2: doc_optparse.exit() # Error elif len(args) < 3: out = match2dbSNP(args[0], args[1]) for line in out: print line else: match2dbSNP_to_file(args[0], args[1], args[2])
def main(): """Match a GFF file against JSON-formatted GET-Evidence data""" # parse options option, args = doc_optparse.parse(__doc__) if len(args) < 2: doc_optparse.exit() # Error elif len(args) < 3: out = match_getev(args[0], args[1]) for line in out: print line else: match_getev_to_file(args[0], args[1], args[2])
def main(): # return if we don't have the correct arguments # parse options option, args = doc_optparse.parse(__doc__) if len(args) < 3: doc_optparse.exit() # Error elif len(args) < 4: out = predict_nonsynonymous(args[0], args[1], args[2]) for line in out: print line else: predict_nonsynonymous_to_file(args[0], args[1], args[2], args[3])
def main(): # parse options option, args = doc_optparse.parse(__doc__) if len(args) < 1: doc_optparse.exit() # first, try to connect to the databases try: connection = MySQLdb.connect(host=DB_HOST, user=GENOTYPE_USER, passwd=GENOTYPE_PASSWD, db=GENOTYPE_DATABASE) cursor = connection.cursor() except MySQLdb.OperationalError, message: print "Error %d while connecting to database: %s" % (message[0], message[1]) sys.exit()
def main(): # parse options option, args = doc_optparse.parse(__doc__) if len(args) < 2: doc_optparse.exit() gff_files_1 = glob.glob(args[0]) gff_files_2 = glob.glob(args[1]) # create temporary files to store intersections temp_file_1 = TemporaryFile() temp_file_2 = TemporaryFile() if not option.enumerate: # use a wider column if we're going to need it if option.read_depth: col_width = 24 elif option.verbose: col_width = 16 else: col_width = 8 # print column headings print " " * 8, for i in range(1, len(gff_files_1) + 1): print excel_column(i).ljust(col_width), print "" # initialize counter to print row headings file_number = 0 # iterate through the second list of files for g2_path in gff_files_2: # print row heading if not option.enumerate: file_number += 1 print str(file_number).ljust(8), # now iterate through the first list, do intersections and compare for g1_path in gff_files_1: # do the intersection one way g1 = gff.input(g1_path) g2 = gff.input(g2_path) for line in g1.intersect(g2): print >> temp_file_1, line # now do the intersection the other way g1_reverse = gff.input(g1_path) g2_reverse = gff.input(g2_path) for line in g2_reverse.intersect(g1_reverse): print >> temp_file_2, line # rewind each temporary file now storing intersection data temp_file_1.seek(0) temp_file_2.seek(0) # now go through the temporary files and work out concordancy g1_intx = gff.input(temp_file_1) g2_intx = gff.input(temp_file_2) matching_count = unmatching_count = 0 # we cannot chain equal signs here, because the two would reference the # same list, and that would be bad... matching_read_depths, unmatching_read_depths = [], [] for record1 in g1_intx: record2 = g2_intx.next() # these records should match in terms of the interval they represent if record2.seqname != record1.seqname or \ record2.start != record1.start or \ record2.end != record1.end: raise ValueError("files must be pre-sorted") # isolate the read depth info if we need to if option.read_depth: rd = [] try: rd.append(int(record1.attributes["read_depth"].strip("\""))) except KeyError: pass try: rd.append(int(record2.attributes["read_depth"].strip("\""))) except KeyError: pass # now test if there's concordance try: if sorted(record2.attributes["alleles"].strip("\"").split("/")) != \ sorted(record1.attributes["alleles"].strip("\"").split("/")): unmatching_count += 1 if option.enumerate: record1.attributes["concordant"] = "false" record2.attributes["concordant"] = "false" print record1 print record2 if option.read_depth: unmatching_read_depths.extend(rd) else: matching_count += 1 if option.enumerate: record1.attributes["concordant"] = "true" record2.attributes["concordant"] = "true" print record1 print record2 if option.read_depth: matching_read_depths.extend(rd) # no alleles? not a SNP except KeyError: continue # now we print the result, being mindful of possible zero division problems, etc. if option.enumerate: pass elif option.read_depth: try: a = "%.1f" % mean(matching_read_depths) b = "%.1f" % median(matching_read_depths) except TypeError: a = "--" b = "--" try: c = "%.1f" % mean(unmatching_read_depths) d = "%.1f" % median(unmatching_read_depths) except TypeError: c = "--" d = "--" print ("%s %s : %s %s" % (a, b, c, d)).ljust(col_width), else: try: p = "%.1f%%" % (float(matching_count) / (matching_count + unmatching_count) * 100) except ZeroDivisionError: p = "--" if option.verbose: total_count = unmatching_count + matching_count print ("%s %s/%s" % (p, matching_count, total_count)).ljust(col_width), else: print p.ljust(col_width), # now we rewind, delete everything, and start again! temp_file_1.seek(0) temp_file_1.truncate() temp_file_2.seek(0) temp_file_2.truncate() # wrap up the line print "" # print the legend describing what the column and row headings mean if not option.enumerate: print "-" * 8 file_number = 0 for i in gff_files_1: file_number += 1 print ("[%s]" % excel_column(file_number)).ljust(8), print i file_number = 0 for i in gff_files_2: file_number += 1 print ("[%s]" % file_number).ljust(8), print i
def main(): # parse options option, args = doc_optparse.parse(__doc__) if option.stderr: sysin = sys.stdin.fileno() sysout = sys.stdout.fileno() syserr = sys.stderr.fileno() newout = file(option.stderr,'a+',0) sys.stderr.flush() sys.stdout.flush() os.close(sysin) os.close(sysout) os.dup2(newout.fileno(), sysout) os.close(sys.stderr.fileno()) os.dup2(newout.fileno(), syserr) if option.pidfile: file(option.pidfile,'w+').write("%d\n" % os.getpid()) # deal with the trackback option if option.trackback: if len(args) < 4: doc_optparse.exit() url = args[0] path = args[1] kind = args[2] request_token = args[3] params = urllib.urlencode({ 'path': path, 'kind': kind, 'request_token': request_token }) trackback(url, params) return # otherwise, figure out the host and port host = option.host or "localhost" port = int(option.port or 8080) # create server server = xrs((host, port)) server.register_introspection_functions() def submit(genotype_file, coverage_file='', username=None, password=None): # get genotype file r = urllib2.Request(genotype_file) if username is not None: h = "Basic %s" % base64.encodestring('%s:%s' % (username, password)).strip() r.add_header("Authorization", h) handle = urllib2.urlopen(r) # write it to a temporary location while calculating its hash s = hashlib.sha1() output_handle, output_path = mkstemp() for line in handle: os.write(output_handle, line) s.update(line) os.close(output_handle) # now figure out where to store the file permanently permanent_dir = os.path.join(UPLOAD_DIR, s.hexdigest()) permanent_file = os.path.join(permanent_dir, "genotype.gff") if not os.path.exists(permanent_dir): os.makedirs(permanent_dir) shutil.copy(output_path, permanent_file) # run the query submit_local(permanent_file) return s server.register_function(submit) def submit_local(genotype_file, coverage_file='', trackback_url='', request_token='', reprocess_all=False): # create output dir input_dir = os.path.dirname(genotype_file) output_dir = input_dir + "-out" try: if not os.path.exists(output_dir): os.makedirs(output_dir) except: print "Unexpected error:", sys.exc_info()[0] # cache phenotype/profile data locally if it is a special symlink if (os.path.islink(os.path.join(input_dir,"phenotype")) and re.match('warehouse://.*', os.readlink(os.path.join(input_dir,"phenotype")))): cmd = '''( set -e cd '%s' whget phenotype phenotype.$$ mv phenotype phenotype-locator mv --no-target-directory phenotype.$$ phenotype ) &''' % os.path.dirname(genotype_file) subprocess.call (cmd, shell=True) # fetch from warehouse if genotype file is special symlink fetch_command = "cat" if os.path.islink(genotype_file): if re.match('warehouse://.*', os.readlink(genotype_file)): fetch_command = "whget" # letters refer to scripts; numbers refer to outputs args = { 'reprocess_all': reprocess_all, 'A': os.path.join(script_dir, "gff_twobit_query.py"), 'B': os.path.join(script_dir, "gff_dbsnp_query.py"), 'C': os.path.join(script_dir, "gff_nonsynonymous_filter.py"), 'Z': os.path.join(script_dir, "trait-o-matic-server.py"), 'in': genotype_file, 'fetch': fetch_command, 'reference': REFERENCE_GENOME, 'url': trackback_url, 'token': request_token, '1': os.path.join(output_dir, "genotype.gff"), '2': os.path.join(output_dir, "genotype.dbsnp.gff"), 'ns_gff': os.path.join(output_dir, "ns.gff"), 'dbsnp_filters': "snpedia hugenetgwas", 'ns_filters': "omim hgmd morbid pharmgkb get-evidence", 'script_dir': script_dir, 'output_dir': output_dir, 'lockfile': os.path.join(output_dir, "lock"), 'logfile': os.path.join(output_dir, "log")} cmd = '''( flock --nonblock --exclusive 2 || exit set -x set -e cd '%(output_dir)s' || exit if [ ! -e '%(ns_gff)s' -o ! -e '%(1)s' -o '%(reprocess_all)s' != False ] then %(fetch)s '%(in)s' | gzip -cdf | python '%(A)s' '%(reference)s' /dev/stdin | egrep 'ref_allele [ACGTN]' > '%(1)s'.tmp mv '%(1)s'.tmp '%(1)s' python '%(B)s' '%(1)s' > '%(2)s'.tmp mv '%(2)s'.tmp '%(2)s' python '%(C)s' '%(2)s' '%(reference)s' > '%(ns_gff)s'.tmp mv '%(ns_gff)s'.tmp '%(ns_gff)s' fi python '%(script_dir)s'/gff2json.py '%(ns_gff)s' > ns.json.tmp mv ns.json.tmp ns.json python '%(script_dir)s'/json_allele_frequency_query.py ns.json --in-place jsons="" for filter in %(dbsnp_filters)s do python '%(script_dir)s'/gff_${filter}_map.py '%(2)s' > ${filter}.json.tmp mv ${filter}.json.tmp ${filter}.json python '%(script_dir)s'/json_allele_frequency_query.py "$filter.json" --in-place jsons="$jsons %(output_dir)s/${filter}.json" done for filter in %(ns_filters)s do python '%(script_dir)s'/gff_${filter}_map.py '%(ns_gff)s' > "$filter.json.tmp" mv "$filter.json.tmp" "$filter.json" python '%(script_dir)s'/json_allele_frequency_query.py "$filter.json" --in-place jsons="$jsons %(output_dir)s/${filter}.json" done python '%(script_dir)s'/json_to_job_database.py --drop-tables $jsons '%(output_dir)s'/ns.json touch README for filter in %(ns_filters)s %(dbsnp_filters)s ns do python '%(Z)s' -t '%(url)s' '%(output_dir)s'/$filter.json out/$filter '%(token)s' done python '%(Z)s' -t '%(url)s' '%(output_dir)s'/README out/readme '%(token)s' mv %(lockfile)s %(logfile)s ) 2>>%(lockfile)s &''' % args subprocess.call(cmd, shell=True) return output_dir server.register_function(submit_local) def get_progress(genotype_file): output_dir = os.path.dirname(genotype_file) + "-out" lockfile = os.path.join(output_dir,'lock') logfile = os.path.join(output_dir,'log') # remove the lockfile if it is stale subprocess.call('flock --nonblock --exclusive %(lock)s mv %(lock)s %(log)s 2>/dev/null || true' % { "lock": lockfile, "log": logfile }, shell=True) if os.path.exists(lockfile): return { "state": "processing" } else: return { "state": "finished" } server.register_function(get_progress) def copy_to_warehouse(genotype_file, coverage_file, phenotype_file, trackback_url='', request_token='', recopy=True, tag=False): output_dir = os.path.dirname(genotype_file) g_locator = _copy_file_to_warehouse (genotype_file, "genotype.gff", tag, "genotype") c_locator = _copy_file_to_warehouse (coverage_file, "coverage", tag, "coverage") p_locator = _copy_file_to_warehouse (phenotype_file, "profile.json", tag, "profile") if (g_locator != None and c_locator != None and p_locator != None): return (g_locator, c_locator, p_locator) return None server.register_function(copy_to_warehouse) def _copy_file_to_warehouse (source_file, target_filename=None, tag=False, data_type=None, trackback_url=None, recopy=True): if not source_file: return '' # if file is special symlink, return link target if os.path.islink(source_file): if re.match('warehouse://.*', os.readlink(source_file)): locator = os.readlink(source_file) _update_warehouse_name_list (locator, target_filename, tag, data_type) return locator # if file has already been copied to warehouse, do not recopy if not recopy and os.path.islink(source_file + '-locator'): locator = os.readlink(source_file + '-locator') _update_warehouse_name_list (locator, target_filename, tag, data_type) return locator # if copying is required, fork a child process and return now if os.fork() > 0: # wait for intermediate proc to fork & exit os.wait() # return existing locator if available if os.path.islink(source_file + '-locator'): return os.readlink(source_file + '-locator') return '' # double-fork avoids accumulating zombie child processes if os.fork() > 0: os._exit(0) if not target_filename: target_filename = os.path.basename (source_file) whput = subprocess.Popen(["whput", "--in-manifest", "--use-filename=%s" % target_filename, source_file], stdout=subprocess.PIPE) (locator, errors) = whput.communicate() ret = whput.returncode if ret == None: ret = whput.wait if ret == 0: locator = 'warehouse:///' + locator.strip() + '/' + target_filename try: os.symlink(locator, source_file + '-locator.tmp') os.rename(source_file + '-locator.tmp', source_file + '-locator') _update_warehouse_name_list (locator, target_filename, tag, data_type) except OSError: print >> sys.stderr, 'Ignoring error creating symlink ' + source_file + '-locator' if trackback_url: subprocess.call("python '%(Z)s' -t '%(url)s' '%(out)s' '%(source)s' '%(token)s'" % { 'Z': os.path.join (script_dir, "trait-o-matic-server.py"), 'url': trackback_url, 'out': locator, 'source': source_file, 'token': request_token }) os._exit(0) os._exit(1) def _update_warehouse_name_list (locator, target_filename, tag, data_type): if tag: share_name = "/" + os.uname()[1] + "/Trait-o-matic/" + tag + "/" + data_type share_target = re.sub("warehouse:///", "", locator) old_target = warehouse.name_lookup (share_name) whargs = ["wh", "manifest", "name", "name=" + share_name, "newkey=" + share_target] if old_target: whargs.append ("oldkey=" + old_target) subprocess.call (whargs) # run the server's main loop server.serve_forever()
def main(): # parse options option, args = doc_optparse.parse(__doc__) # deal with the trackback option if option.trackback: if len(args) < 4: doc_optparse.exit() url = args[0] path = args[1] kind = args[2] request_token = args[3] params = urllib.urlencode({"path": path, "kind": kind, "request_token": request_token}) trackback(url, params) return # otherwise, figure out the host and port host = option.host or "localhost" port = int(option.port or 8080) # create server server = xrs((host, port)) server.register_introspection_functions() def submit(genotype_file, coverage_file="", username=None, password=None): # get genotype file r = urllib2.Request(genotype_file) if username is not None: h = "Basic %s" % base64.encodestring("%s:%s" % (username, password)).strip() r.add_header("Authorization", h) handle = urllib2.urlopen(r) # write it to a temporary location while calculating its hash s = hashlib.sha1() output_handle, output_path = mkstemp() for line in handle: os.write(output_handle, line) s.update(line) os.close(output_handle) # now figure out where to store the file permanently permanent_dir = os.path.join(UPLOAD_DIR, s.hexdigest()) permanent_file = os.path.join(permanent_dir, "genotype.gff") if not os.path.exists(permanent_dir): os.makedirs(permanent_dir) shutil.copy(output_path, permanent_file) # run the query submit_local(permanent_file) return s server.register_function(submit) def submit_local(genotype_file, coverage_file="", trackback_url="", request_token=""): # execute script script_dir = os.path.dirname(sys.argv[0]) # create output dir output_dir = os.path.dirname(genotype_file) + "-out" try: if not os.path.exists(output_dir): os.makedirs(output_dir) except: print "Unexpected error:", sys.exc_info()[0] # fetch from warehouse if genotype file is special symlink fetch_command = "cat" if os.path.islink(genotype_file): if re.match("warehouse://.*", os.readlink(genotype_file)): fetch_command = "whget" # letters refer to scripts; numbers refer to outputs args = { "A": os.path.join(script_dir, "gff_twobit_query.py"), "B": os.path.join(script_dir, "gff_dbsnp_query.py"), "C": os.path.join(script_dir, "gff_nonsynonymous_filter.py"), "D": os.path.join(script_dir, "gff_omim_map.py"), "E": os.path.join(script_dir, "gff_hgmd_map.py"), "F": os.path.join(script_dir, "gff_morbid_map.py"), "G": os.path.join(script_dir, "gff_snpedia_map.py"), "pharmgkb_bin": os.path.join(script_dir, "gff_pharmgkb_map.py"), "H": os.path.join(script_dir, "json_allele_frequency_query.py"), "I": os.path.join(script_dir, "json_to_job_database.py"), "Z": os.path.join(script_dir, "server.py"), "in": genotype_file, "fetch": fetch_command, "reference": REFERENCE_GENOME, "url": trackback_url, "token": request_token, "1": os.path.join(output_dir, "genotype.gff"), "2": os.path.join(output_dir, "genotype.dbsnp.gff"), "3": os.path.join(output_dir, "ns.gff"), "4": os.path.join(output_dir, "omim.json"), "5": os.path.join(output_dir, "hgmd.json"), "6": os.path.join(output_dir, "morbid.json"), "7": os.path.join(output_dir, "snpedia.json"), "pharmgkb_out": os.path.join(output_dir, "pharmgkb.json"), "8": "", "0": os.path.join(output_dir, "README"), } cmd = ( """( %(fetch)s '%(in)s' | python '%(A)s' '%(reference)s' /dev/stdin > '%(1)s' python '%(B)s' '%(1)s' > '%(2)s' python '%(C)s' '%(2)s' '%(reference)s' > '%(3)s' python '%(D)s' '%(3)s' > '%(4)s' python '%(E)s' '%(3)s' > '%(5)s' python '%(F)s' '%(3)s' > '%(6)s' python '%(G)s' '%(2)s' > '%(7)s' python '%(pharmgkb_bin)s' '%(3)s' > '%(pharmgkb_out)s' python '%(H)s' '%(4)s' '%(5)s' '%(6)s' '%(7)s' '%(pharmgkb_out)s' --in-place python '%(I)s' --drop-tables '%(4)s' '%(5)s' '%(6)s' '%(7)s' '%(pharmgkb_out)s' touch '%(0)s' python '%(Z)s' -t '%(url)s' '%(4)s' 'out/omim' '%(token)s' python '%(Z)s' -t '%(url)s' '%(5)s' 'out/hgmd' '%(token)s' python '%(Z)s' -t '%(url)s' '%(6)s' 'out/morbid' '%(token)s' python '%(Z)s' -t '%(url)s' '%(7)s' 'out/snpedia' '%(token)s' python '%(Z)s' -t '%(url)s' '%(pharmgkb_out)s' 'out/pharmgkb' '%(token)s' python '%(Z)s' -t '%(url)s' '%(0)s' 'out/readme' '%(token)s' )&""" % args ) subprocess.call(cmd, shell=True) return output_dir server.register_function(submit_local) def copy_to_warehouse( genotype_file, coverage_file, phenotype_file, trackback_url="", request_token="", recopy=True ): # execute script script_dir = os.path.dirname(sys.argv[0]) output_dir = os.path.dirname(genotype_file) g_locator = _copy_file_to_warehouse(genotype_file, "genotype.gff") c_locator = _copy_file_to_warehouse(coverage_file, "coverage") p_locator = _copy_file_to_warehouse(phenotype_file, "phenotype.json") if g_locator != None and c_locator != None and p_locator != None: return (g_locator, c_locator, p_locator) return None server.register_function(copy_to_warehouse) def _copy_file_to_warehouse(source_file, target_filename=None, trackback_url=None, recopy=True): if not source_file: return "" # if file is special symlink, return link target if os.path.islink(source_file): if re.match("warehouse://.*", os.readlink(source_file)): return os.readlink(source_file) # if file has already been copied to warehouse, do not recopy if not recopy and os.path.islink(source_file + "-locator"): return os.readlink(source_file + "-locator") # if copying is required, fork a child process and return now if os.fork() > 0: # wait for intermediate proc to fork & exit os.wait() # return existing locator if available if os.path.islink(source_file + "-locator"): return os.readlink(source_file + "-locator") return "" # double-fork avoids accumulating zombie child processes if os.fork() > 0: os._exit(0) if not target_filename: target_filename = os.path.basename(source_file) whput = subprocess.Popen( ["whput", "--in-manifest", "--use-filename=%s" % target_filename, source_file], stdout=subprocess.PIPE ) (locator, errors) = whput.communicate() ret = whput.returncode if ret == None: ret = whput.wait if ret == 0: locator = "warehouse:///" + locator.strip() + "/" + target_filename try: os.symlink(locator, source_file + "-locator.tmp") os.rename(source_file + "-locator.tmp", source_file + "-locator") except OSError: print >> sys.stderr, "Ignoring error creating symlink " + source_file + "-locator" if trackback_url: subprocess.call( "python '%(Z)s' -t '%(url)s' '%(out)s' '%(source)s' '%(token)s'" % { "Z": os.path.join(script_dir, "server.py"), "url": trackback_url, "out": locator, "source": source_file, "token": request_token, } ) os._exit(0) os._exit(1) # run the server's main loop server.serve_forever()
def main(): # parse options option, args = doc_optparse.parse(__doc__) # deal with the trackback option if option.trackback: if len(args) < 4: doc_optparse.exit() url = args[0] path = args[1] kind = args[2] request_token = args[3] params = urllib.urlencode({ 'path': path, 'kind': kind, 'request_token': request_token }) trackback(url, params) return # otherwise, figure out the host and port host = option.host or "localhost" port = int(option.port or 8080) # create server server = xrs((host, port)) server.register_introspection_functions() def submit(genotype_file, coverage_file='', username=None, password=None): # get genotype file r = urllib2.Request(genotype_file) if username is not None: h = "Basic %s" % base64.encodestring('%s:%s' % (username, password)).strip() r.add_header("Authorization", h) handle = urllib2.urlopen(r) # write it to a temporary location while calculating its hash s = hashlib.sha1() output_handle, output_path = mkstemp() for line in handle: os.write(output_handle, line) s.update(line) os.close(output_handle) # now figure out where to store the file permanently permanent_dir = os.path.join(UPLOAD_DIR, s.hexdigest()) permanent_file = os.path.join(permanent_dir, "genotype.gff") if not os.exists(permanent_dir): os.makedirs(permanent_dir) shutil.move(output_path, permanent_file) # run the query submit_local(permanent_file) return s server.register_function(submit) def submit_local(genotype_file, coverage_file='', trackback_url='', request_token=''): # execute script script_dir = os.path.dirname(sys.argv[0]) output_dir = os.path.dirname(genotype_file) # letters refer to scripts; numbers refer to outputs args = { 'A': os.path.join(script_dir, "gff_twobit_query.py"), 'B': os.path.join(script_dir, "gff_dbsnp_query.py"), 'C': os.path.join(script_dir, "gff_nonsynonymous_filter.py"), 'D': os.path.join(script_dir, "gff_omim_map.py"), 'E': os.path.join(script_dir, "gff_hgmd_map.py"), 'F': os.path.join(script_dir, "gff_morbid_map.py"), 'G': os.path.join(script_dir, "gff_snpedia_map.py"), 'H': os.path.join(script_dir, "json_allele_frequency_query.py"), 'I': os.path.join(script_dir, "json_to_job_database.py"), 'Z': os.path.join(script_dir, "server.py"), 'in': genotype_file, 'reference': REFERENCE_GENOME, 'url': trackback_url, 'token': request_token, '1': os.path.join(output_dir, "genotype.gff"), '2': os.path.join(output_dir, "genotype.dbsnp.gff"), '3': os.path.join(output_dir, "ns.gff"), '4': os.path.join(output_dir, "omim.json"), '5': os.path.join(output_dir, "hgmd.json"), '6': os.path.join(output_dir, "morbid.json"), '7': os.path.join(output_dir, "snpedia.json"), '8': "", '0': os.path.join(output_dir, "README") } cmd = '''( python '%(A)s' '%(in)s' '%(reference)s' > '%(1)s' python '%(B)s' '%(1)s' > '%(2)s' python '%(C)s' '%(2)s' '%(reference)s' > '%(3)s' python '%(D)s' '%(3)s' > '%(4)s' python '%(E)s' '%(3)s' > '%(5)s' python '%(F)s' '%(3)s' > '%(6)s' python '%(G)s' '%(2)s' > '%(7)s' python '%(H)s' '%(4)s' '%(5)s' '%(6)s' '%(7)s' --in-place python '%(I)s' '%(4)s' '%(5)s' '%(6)s' '%(7)s' touch '%(0)s' python '%(Z)s' -t '%(url)s' '%(4)s' 'out/omim' '%(token)s' python '%(Z)s' -t '%(url)s' '%(5)s' 'out/hgmd' '%(token)s' python '%(Z)s' -t '%(url)s' '%(6)s' 'out/morbid' '%(token)s' python '%(Z)s' -t '%(url)s' '%(7)s' 'out/snpedia' '%(token)s' python '%(Z)s' -t '%(url)s' '%(0)s' 'out/readme' '%(token)s' )&''' % args subprocess.call(cmd, shell=True) return output_dir server.register_function(submit_local) # run the server's main loop server.serve_forever()
def main(): # parse options option, args = doc_optparse.parse(__doc__) if len(args) < 2: doc_optparse.exit() flank = int(option.flank or 0) # try opening the file both ways, in case the arguments got confused try: gff_file = gff.input(args[1]) twobit_file = twobit.input(args[0]) except Exception: gff_file = gff.input(args[0]) twobit_file = twobit.input(args[1]) # initialize a set of variables to keep track of uniqueness, if we need them if option.unique: previous_record = None previous_ref_seq = None repetition_count = 1 for record in gff_file: # if we're using the unique option, output the previous record only when # we're sure we've seen all repetitions of it if option.unique and record == previous_record: repetition_count += 1 continue elif option.unique: if previous_record: previous_record.attributes["repetition_count"] = str(repetition_count) print FastaRecord(str(previous_record).replace("\t", "|"), previous_ref_seq) repetition_count = 1 previous_record = record if record.seqname.startswith("chr"): chr = record.seqname else: chr = "chr" + record.seqname ref_seq = twobit_file[chr][(record.start - 1):record.end] if flank != 0: # calculate the flanks (these variables are 0-based) left_flank_start = record.start - flank - 1 left_flank_end = record.start - 1 if left_flank_start < 0: left_flank_start = 0 right_flank_start = record.end right_flank_end = record.end + flank # now find them left_flank_seq = twobit_file[chr][left_flank_start:left_flank_end] right_flank_seq = twobit_file[chr][right_flank_start:right_flank_end] ref_seq = left_flank_seq + "\n\n" + ref_seq + "\n\n" + right_flank_seq if option.strand and record.strand == "-": ref_seq = reverse_complement(ref_seq) # we don't output the current record if we're using the unique option if option.unique: previous_ref_seq = ref_seq else: print FastaRecord(str(record).replace("\t", "|"), ref_seq) # we'll have one last record yet to output if we used the unique option if option.unique: previous_record.attributes["repetition_count"] = str(repetition_count) print FastaRecord(str(previous_record).replace("\t", "|"), previous_ref_seq)