def main(): """ The main function """ parser = cmdline_parser() (opts, args) = parser.parse_args() snp_files = args if len(snp_files)<2: parser.error("Need more than two SNP files.") sys.exit(1) if opts.verbose: LOG.setLevel(logging.INFO) if opts.debug: LOG.setLevel(logging.DEBUG) for (required_opt, opt_descr) in [ (opts.seq_len, "sequence length"), ]: if not required_opt: LOG.fatal("Missing %s argument" % opt_descr) sys.exit(1) LOG.info("Init sig-level=%f" % (SIG_LEVEL)) excl_pos = [] if opts.fexclude: excl_pos = read_exclude_pos_file(opts.fexclude) LOG.info("Excluding %d positions" % len(excl_pos)) snps = [] for snp_file in snp_files: more_snps = snp.parse_snp_file(snp_file) snps.extend(more_snps) LOG.info("Parsed %d SNPs from %s" % (len(more_snps), snp_file)) print "#coldspots" print "#excluding positions listed in %s" % opts.fexclude print "#considering snvs listed in %s" % (', '.join(snp_files)) find_coldspot_regions(snps, opts.seq_len, excl_pos)
def main(): """ The main function """ parser = cmdline_parser() (opts, args) = parser.parse_args() if len(args): parser.error("Unrecognized arguments found: %s." % ( ' '.join(args))) sys.exit(1) if opts.verbose: LOG.setLevel(logging.INFO) if opts.debug: LOG.setLevel(logging.DEBUG) for (required_opt, opt_descr) in [(opts.win_size, "window size"), (opts.step_size, "step size"), (opts.snp_file, "SNP file"), (opts.seq_len, "sequence length")]: if not required_opt: LOG.fatal("Missing %s argument" % opt_descr) sys.exit(1) excl_pos = [] if opts.fexclude: excl_pos = read_exclude_pos_file(opts.fexclude) LOG.info("Excluding %d positions" % len(excl_pos)) snps = snp.parse_snp_file(opts.snp_file) LOG.info("Parsed %d SNPs from %s" % (len(snps), opts.snp_file)) find_hotspot_windows(snps, opts.seq_len, opts.win_size, opts.step_size, excl_pos)
def main(): """The main function """ parser = cmdline_parser() args = parser.parse_args() # FIXME catch unrecognized args (not just (len(args) if args.verbose: LOG.setLevel(logging.INFO) if args.debug: LOG.setLevel(logging.DEBUG) for (in_file, descr) in [(args.var_file, "variant file")]: if not in_file: parser.error("%s input file argument missing." % descr) sys.exit(1) if not os.path.exists(in_file) and in_file != "-": sys.stderr.write( "file '%s' does not exist.\n" % in_file) sys.exit(1) for (out_file, descr) in [(args.cluster_file, "cluster output file")]: if not out_file: parser.error("%s output file argument missing." % descr) sys.exit(1) if os.path.exists(out_file) and out_file!="-": sys.stderr.write( "Cowardly refusing to overwrite existing" " output file '%s'.\n" % out_file) sys.exit(1) # A lot of code for supporting legacy SNP format. # # FIXME this and MetaVar() should use vcf by default and just # convert snp to vcf # is_vcf = False if HAVE_VCF_MODULE: if args.var_file == '-': vcf_fh = sys.stdin else: vcf_fh = open(args.var_file) # FIXME gzip support vcf_reader = vcf.VCFReader(vcf_fh) try: var_list = [MetaVar(vcf_var=r) for r in vcf_reader] is_vcf = True except: raise if vcf_fh != sys.stdin: vcf_fh.close() is_snp = False if not is_vcf and HAVE_SNP_MODULE: try: var_list = [MetaVar(snp_var=s) for s in snp.parse_snp_file(args.var_file)] is_snp = True except IndexError: pass if not is_snp and not is_vcf: LOG.error("Can't parse %s. Tried the following formats: %s" % ( args.var_file, ', '.join(SUPPORTED_FORMATS))) sys.exit(1) LOG.info("Parsed %d SNPs from %s" % (len(var_list), args.var_file)) var_list = sorted(var_list, key=lambda x: x.freq, reverse=True) if args.cluster_file == '-': fh_out = sys.stdout else: fh_out = open(args.cluster_file, 'w') if len(var_list)==0: fh_out.write("No SNPs <-> no clusters!\n") if fh_out != sys.stdout: print "No SNPs <-> no clusters!" fh_out.close() sys.exit(0) cluster = dict() clu_no = 0 seed = var_list[0] #cluster[clu_no,'members'] = ["%s %f" % (seed.repr, seed.freq)] cluster[clu_no,'members'] = ["%s" % (seed.repr)] cluster[clu_no,'min'] = seed.min_ci cluster[clu_no,'max'] = seed.max_ci for var in var_list[1:]: LOG.debug("checking %s %f: max_ci %f vvar. clu_min %f" % ( var.repr, var.freq, var.max_ci, cluster[clu_no,'min'])) if var.max_ci > cluster[clu_no,'min']: #cluster[clu_no,'members'].append("%s %f" % (var.repr, var.freq)) cluster[clu_no,'members'].append("%s" % (var.repr)) else: clu_no += 1 seed = var #cluster[clu_no,'members'] = ["%s %f" % (seed.repr, seed.freq)] cluster[clu_no,'members'] = ["%s" % (seed.repr)] cluster[clu_no,'min'] = seed.min_ci cluster[clu_no,'max'] = seed.max_ci for i in range(clu_no+1): fh_out.write("cluster %d (freq. range: %f - %f): %s\n" % ( i+1, cluster[i,'min'], cluster[i,'max'], ', '.join(cluster[i,'members']))) if fh_out != sys.stdout: fh_out.close() print "%d clusters found (written to %s)" % (clu_no+1, fh_out.name)
def main(): """The main function """ parser = cmdline_parser() args = parser.parse_args() # FIXME catch unrecognized args (not just (len(args) if args.verbose: LOG.setLevel(logging.INFO) if args.debug: LOG.setLevel(logging.DEBUG) for (in_file, descr) in [(args.var_file, "variant file")]: if not in_file: parser.error("%s input file argument missing." % descr) sys.exit(1) if not os.path.exists(in_file) and in_file != "-": sys.stderr.write("file '%s' does not exist.\n" % in_file) sys.exit(1) for (out_file, descr) in [(args.cluster_file, "cluster output file")]: if not out_file: parser.error("%s output file argument missing." % descr) sys.exit(1) if os.path.exists(out_file) and out_file != "-": sys.stderr.write("Cowardly refusing to overwrite existing" " output file '%s'.\n" % out_file) sys.exit(1) # A lot of code for supporting legacy SNP format. # # FIXME this and MetaVar() should use vcf by default and just # convert snp to vcf # is_vcf = False if HAVE_VCF_MODULE: if args.var_file == '-': vcf_fh = sys.stdin else: vcf_fh = open(args.var_file) # FIXME gzip support vcf_reader = vcf.VCFReader(vcf_fh) try: var_list = [MetaVar(vcf_var=r) for r in vcf_reader] is_vcf = True except: raise if vcf_fh != sys.stdin: vcf_fh.close() is_snp = False if not is_vcf and HAVE_SNP_MODULE: try: var_list = [ MetaVar(snp_var=s) for s in snp.parse_snp_file(args.var_file) ] is_snp = True except IndexError: pass if not is_snp and not is_vcf: LOG.error("Can't parse %s. Tried the following formats: %s" % (args.var_file, ', '.join(SUPPORTED_FORMATS))) sys.exit(1) LOG.info("Parsed %d SNPs from %s" % (len(var_list), args.var_file)) var_list = sorted(var_list, key=lambda x: x.freq, reverse=True) if args.cluster_file == '-': fh_out = sys.stdout else: fh_out = open(args.cluster_file, 'w') if len(var_list) == 0: fh_out.write("No SNPs <-> no clusters!\n") if fh_out != sys.stdout: print "No SNPs <-> no clusters!" fh_out.close() sys.exit(0) cluster = dict() clu_no = 0 seed = var_list[0] #cluster[clu_no,'members'] = ["%s %f" % (seed.repr, seed.freq)] cluster[clu_no, 'members'] = ["%s" % (seed.repr)] cluster[clu_no, 'min'] = seed.min_ci cluster[clu_no, 'max'] = seed.max_ci for var in var_list[1:]: LOG.debug("checking %s %f: max_ci %f vvar. clu_min %f" % (var.repr, var.freq, var.max_ci, cluster[clu_no, 'min'])) if var.max_ci > cluster[clu_no, 'min']: #cluster[clu_no,'members'].append("%s %f" % (var.repr, var.freq)) cluster[clu_no, 'members'].append("%s" % (var.repr)) else: clu_no += 1 seed = var #cluster[clu_no,'members'] = ["%s %f" % (seed.repr, seed.freq)] cluster[clu_no, 'members'] = ["%s" % (seed.repr)] cluster[clu_no, 'min'] = seed.min_ci cluster[clu_no, 'max'] = seed.max_ci for i in range(clu_no + 1): fh_out.write("cluster %d (freq. range: %f - %f): %s\n" % (i + 1, cluster[i, 'min'], cluster[i, 'max'], ', '.join( cluster[i, 'members']))) if fh_out != sys.stdout: fh_out.close() print "%d clusters found (written to %s)" % (clu_no + 1, fh_out.name)