def pipeline_validation_experiment(location_file, true_type, true_location, pedigree, debug=False, remove_partial_calls=False): '''Load (the ''true'') genotypes from an external source. Load a list of locations from ''location_file''. Impute them and compare with the true genotypes.''' g = extract_genotypes(location_file) t = ImputationSet(pedigree, g) if true_type == 'iplex': true_genotype = im.imputation.reader.iplex_to_genotype(true_location, t) # os.environ['OBER'] + '/data/impute/rare/to_livne_20121205', t) else: raise ValueError('Unsupported true genotype format ''%s''' % (true_type,)) problem = Problem(pedigree, true_genotype) p, t = impute_problem(problem, debug=debug, remove_partial_calls=remove_partial_calls) return p, t
def pipeline_validation_experiment( location_file, true_type, true_location, pedigree, debug=False, remove_partial_calls=False ): """Load (the ''true'') genotypes from an external source. Load a list of locations from ''location_file''. Impute them and compare with the true genotypes.""" g = extract_genotypes(location_file) t = ImputationSet(pedigree, g) if true_type == "iplex": true_genotype = im.imputation.reader.iplex_to_genotype( true_location, t ) # os.environ['OBER'] + '/data/impute/rare/to_livne_20121205', t) else: raise ValueError("Unsupported true genotype format " "%s" "" % (true_type,)) problem = Problem(pedigree, true_genotype) p, t = impute_problem(problem, debug=debug, remove_partial_calls=remove_partial_calls) return p, t
help='Print debugging information') parser.add_option('-n', '--no-swap-major-minor' , action='store_true' , dest='allele_swap', default=False, help='Do not swap major/minor allele encoding based on the data') # Note: reads the no-allele-swap flag and then negates it below! parser.add_option('-k', '--custom-identifier' , action='store_true' , dest='custom_id', default=False, help='Assign custom identifiers to ALL snps, not just those with blank identifiers in the data') options, args = parser.parse_args(sys.argv[1:]) options.allele_swap = not options.allele_swap if not options.format in ['plink', 'npz']: print 'Unreocgnized format ''%s''. Supported formats: plink, npz' % (options.format,) print usage if len(args) != 2: print usage sys.exit(1) try: input_file = sys.stdin if args[0] == '-' else open(args[0], 'rb') g = extract_genotypes(input_file, index_file=options.index_file, var_file_prefix=options.var_file_prefix, allele_swap=options.allele_swap, custom_id=options.custom_id, debug=options.debug) out = args[1] if options.format == 'npz': im.io_genotype.write(options.format, g, out + '.npz') elif options.format == 'plink': # Does not yet include the genetic map . TODO: save it to a separate PLINK file if a dedicated # PLINK format exists for this data im.io_genotype.write(options.format, g, open(out + '.tped', 'wb'), sample_id_out=out + '.tfam', recode_cgi=options.recode_cgi) else: raise ValueError('Unsupported output format ''%s''' % (options.format,)) except: traceback.print_exc(file=sys.stdout) sys.exit(util.EXIT_FAILURE)
'Assign custom identifiers to ALL snps, not just those with blank identifiers in the data' ) options, args = parser.parse_args(sys.argv[1:]) options.allele_swap = not options.allele_swap if not options.format in ['plink', 'npz']: print 'Unreocgnized format ' '%s' '. Supported formats: plink, npz' % ( options.format, ) print usage if len(args) != 2: print usage sys.exit(1) try: input_file = sys.stdin if args[0] == '-' else open(args[0], 'rb') g = extract_genotypes(input_file, index_file=options.index_file, var_file_prefix=options.var_file_prefix, allele_swap=options.allele_swap, custom_id=options.custom_id, debug=options.debug) out = args[1] if options.format == 'npz': im.io_genotype.write(options.format, g, out + '.npz') elif options.format == 'plink': # Does not yet include the genetic map . TODO: save it to a separate PLINK file if a dedicated # PLINK format exists for this data im.io_genotype.write(options.format, g, open(out + '.tped', 'wb'), sample_id_out=out + '.tfam', recode_cgi=options.recode_cgi) else: raise ValueError('Unsupported output format '