예제 #1
0
def pipeline_validation_experiment(location_file, true_type, true_location, pedigree, debug=False, remove_partial_calls=False):
    '''Load (the ''true'') genotypes from an external source. Load a list of locations from ''location_file''. Impute them and compare
    with the true genotypes.'''
    g = extract_genotypes(location_file)
    t = ImputationSet(pedigree, g)
    if true_type == 'iplex': true_genotype = im.imputation.reader.iplex_to_genotype(true_location, t)  # os.environ['OBER'] + '/data/impute/rare/to_livne_20121205', t)
    else: raise ValueError('Unsupported true genotype format ''%s''' % (true_type,))
    problem = Problem(pedigree, true_genotype)
    p, t = impute_problem(problem, debug=debug, remove_partial_calls=remove_partial_calls)
    return p, t
예제 #2
0
def pipeline_validation_experiment(
    location_file, true_type, true_location, pedigree, debug=False, remove_partial_calls=False
):
    """Load (the ''true'') genotypes from an external source. Load a list of locations from ''location_file''. Impute them and compare
    with the true genotypes."""
    g = extract_genotypes(location_file)
    t = ImputationSet(pedigree, g)
    if true_type == "iplex":
        true_genotype = im.imputation.reader.iplex_to_genotype(
            true_location, t
        )  # os.environ['OBER'] + '/data/impute/rare/to_livne_20121205', t)
    else:
        raise ValueError("Unsupported true genotype format " "%s" "" % (true_type,))
    problem = Problem(pedigree, true_genotype)
    p, t = impute_problem(problem, debug=debug, remove_partial_calls=remove_partial_calls)
    return p, t
예제 #3
0
                      help='Print debugging information')
    parser.add_option('-n', '--no-swap-major-minor'        , action='store_true'  , dest='allele_swap', default=False,
                      help='Do not swap major/minor allele encoding based on the data')  # Note: reads the no-allele-swap flag and then negates it below!
    parser.add_option('-k', '--custom-identifier'        , action='store_true'  , dest='custom_id', default=False,
                      help='Assign custom identifiers to ALL snps, not just those with blank identifiers in the data')
    options, args = parser.parse_args(sys.argv[1:])
    options.allele_swap = not options.allele_swap 
    if not options.format in ['plink', 'npz']:
        print 'Unreocgnized format ''%s''. Supported formats: plink, npz' % (options.format,)
        print usage
    if len(args) != 2:
        print usage
        sys.exit(1)
    try:
        input_file = sys.stdin if args[0] == '-' else open(args[0], 'rb')
        g = extract_genotypes(input_file, index_file=options.index_file, var_file_prefix=options.var_file_prefix,
                              allele_swap=options.allele_swap, custom_id=options.custom_id, debug=options.debug)
        out = args[1]
        if options.format == 'npz':
            im.io_genotype.write(options.format, g, out + '.npz')
        elif options.format == 'plink':
            # Does not yet include the genetic map . TODO: save it to a separate PLINK file if a dedicated
            # PLINK format exists for this data
            im.io_genotype.write(options.format, g, open(out + '.tped', 'wb'),
                                 sample_id_out=out + '.tfam', recode_cgi=options.recode_cgi)
        else:
            raise ValueError('Unsupported output format ''%s''' % (options.format,))
    except:
        traceback.print_exc(file=sys.stdout)
        sys.exit(util.EXIT_FAILURE)
예제 #4
0
     'Assign custom identifiers to ALL snps, not just those with blank identifiers in the data'
 )
 options, args = parser.parse_args(sys.argv[1:])
 options.allele_swap = not options.allele_swap
 if not options.format in ['plink', 'npz']:
     print 'Unreocgnized format ' '%s' '. Supported formats: plink, npz' % (
         options.format, )
     print usage
 if len(args) != 2:
     print usage
     sys.exit(1)
 try:
     input_file = sys.stdin if args[0] == '-' else open(args[0], 'rb')
     g = extract_genotypes(input_file,
                           index_file=options.index_file,
                           var_file_prefix=options.var_file_prefix,
                           allele_swap=options.allele_swap,
                           custom_id=options.custom_id,
                           debug=options.debug)
     out = args[1]
     if options.format == 'npz':
         im.io_genotype.write(options.format, g, out + '.npz')
     elif options.format == 'plink':
         # Does not yet include the genetic map . TODO: save it to a separate PLINK file if a dedicated
         # PLINK format exists for this data
         im.io_genotype.write(options.format,
                              g,
                              open(out + '.tped', 'wb'),
                              sample_id_out=out + '.tfam',
                              recode_cgi=options.recode_cgi)
     else:
         raise ValueError('Unsupported output format '