Ejemplo n.º 1
0
    def __init__(self, problem, fraction=None, test_index=None):
        '''Initialize an experiment to be run on a problem, clearing out 'fraction' of the data. If test_index
        is specified, these specific test indices are used; otherwise a random fraction is generated.
        
        If test_index = 'hap', data is read from problem.h (haplotype array). The entire array
        is considered as a test array, but nothing is zeroed out. Useful for phasing result stats.'''
        # Create a working copy of the problem. Only the data is copied.
        if not (fraction is not None) ^ (test_index is not None):
            raise ValueError('Must specify fraction or test_index')
        self.problem = Problem(problem.pedigree, problem.genotype.copy())
        self.h = self.problem.h

        # Create test set; save original genotypes in g_orig
        if test_index is None:
            self.fraction = fraction
            self.g_orig, i = clear_random_portion(self.problem.genotype.data,
                                                  fraction)
        elif test_index == 'hap':
            # Don't clear anything; call everything a test index.
            h = problem.h
            i = tuple(
                util.flattened_meshgrid(range(h.shape[0]), range(h.shape[1])))
            self.g_orig = problem.g
            self.h = h
            self.fraction = 1.0
        else:
            self.g_orig, i = clear_index(self.problem.g, test_index)
            self.fraction = (1.0 * i[0].size) / (self.h.shape[0] *
                                                 self.h.shape[1])
        self.num_tests = i[0].size
        self.test_index = i
        self.r_orig = recode.recode_single_genotype(self.g_orig)
        self.fill = self.problem.fill_fraction()[:, SAMPLE]
        self.__recode_single_genotype = None
Ejemplo n.º 2
0
 def __init__(self, problem, fraction=None, test_index=None):
     '''Initialize an experiment to be run on a problem, clearing out 'fraction' of the data. If test_index
     is specified, these specific test indices are used; otherwise a random fraction is generated.
     
     If test_index = 'hap', data is read from problem.h (haplotype array). The entire array
     is considered as a test array, but nothing is zeroed out. Useful for phasing result stats.'''
     # Create a working copy of the problem. Only the data is copied.
     if not (fraction is not None) ^ (test_index is not None):
         raise ValueError('Must specify fraction or test_index')
     self.problem = Problem(problem.pedigree, problem.genotype.copy())
     self.h = self.problem.h
     
     # Create test set; save original genotypes in g_orig
     if test_index is None:
         self.fraction = fraction
         self.g_orig, i = clear_random_portion(self.problem.genotype.data, fraction)
     elif test_index == 'hap':
         # Don't clear anything; call everything a test index.
         h = problem.h
         i = tuple(util.flattened_meshgrid(range(h.shape[0]), range(h.shape[1])))
         self.g_orig = problem.g
         self.h = h
         self.fraction = 1.0
     else:
         self.g_orig, i = clear_index(self.problem.g, test_index)
         self.fraction = (1.0 * i[0].size) / (self.h.shape[0] * self.h.shape[1])
     self.num_tests = i[0].size
     self.test_index = i
     self.r_orig = recode.recode_single_genotype(self.g_orig)
     self.fill = self.problem.fill_fraction()[:, SAMPLE]
     self.__recode_single_genotype = None
Ejemplo n.º 3
0
 def problem_hut():
     '''Load the hutterites data set. Cached since it's large.'''
     if not Templates.PROBLEM_HUT:
         pedigree = Templates.pedigree_hut()
         genotype = io_genotype.read('plink',
                                     'genotype',
                                     prefix=GENOTYPE_SAMPLE,
                                     load_ids=False)
         Templates.PROBLEM_HUT = Problem(pedigree, genotype)
     return Templates.PROBLEM_HUT
Ejemplo n.º 4
0
def pipeline_validation_experiment(location_file, true_type, true_location, pedigree, debug=False, remove_partial_calls=False):
    '''Load (the ''true'') genotypes from an external source. Load a list of locations from ''location_file''. Impute them and compare
    with the true genotypes.'''
    g = extract_genotypes(location_file)
    t = ImputationSet(pedigree, g)
    if true_type == 'iplex': true_genotype = im.imputation.reader.iplex_to_genotype(true_location, t)  # os.environ['OBER'] + '/data/impute/rare/to_livne_20121205', t)
    else: raise ValueError('Unsupported true genotype format ''%s''' % (true_type,))
    problem = Problem(pedigree, true_genotype)
    p, t = impute_problem(problem, debug=debug, remove_partial_calls=remove_partial_calls)
    return p, t
Ejemplo n.º 5
0
 def setUp(self):
     '''Load test data and expected results.'''
     unittest.TestCase.setUp(self)
     # The way to load a pedigree in conjunction with a genotype set is to recode
     # its sample IDs to consecutive for easier access by phasers.
     self.problem = io.read_plink(prefix=itu.GENOTYPE_TRIO, haplotype=None, pedigree=itu.GENOTYPE_TRIO + '.tfam')
     self.phaser = trivial_phaser()
     
     # Expected results
     self.solution = Problem(self.problem.pedigree, io_genotype.read('plink', 'genotype', prefix=itu.GENOTYPE_TRIO_SOLUTION))
Ejemplo n.º 6
0
Archivo: io.py Proyecto: orenlivne/ober
def read_npz(in_file):
    '''Read problem from NPZ file. in_file may be a file name or an open 
    file descriptor.'''

    files = np.load(in_file)
    graph = nx.DiGraph()
    graph.add_nodes_from(files['pedigree_nodes'])
    graph.add_edges_from(files['pedigree_graph'][0])
    p = Pedigree(graph,
                 sample_id=files['pedigree_sample_id'],
                 sex=files['pedigree_sex'],
                 phenotype=files['pedigree_phenotype'],
                 node_type=files['pedigree_node_type'],
                 sample_index=files['pedigree_sample_index'],
                 num_genotyped=files['pedigree_num_genotyped'][0])
    g = GenotypeFactory.new_instance('genotype', files['genotype_data'],
                                     files['genotype_snp'])
    h = GenotypeFactory.new_instance('haplotype',
                                     files['haplotype_data'],
                                     files['haplotype_snp'],
                                     qc=MISSING)
    error = files['error']
    h.qc = files['haplotype_qc']
    info = files['info'][0]
    frames = Frames((k, w) for k, v in files['frames'][0].iteritems()
                    for w in v[0]) if files['frames'][0] else None
    lam = files['lam']

    # Optional fields
    if 'genotype_map' in files.files: g.map = files['genotype_map']
    if 'haplotype_poo_phase' in files.files:
        h.poo_phase = files['haplotype_poo_phase']
    if 'haplotype_hap_type' in files.files:
        h.hap_type = files['haplotype_hap_type']

    return Problem(p,
                   g,
                   haplotype=h,
                   error=error,
                   info=info,
                   frames=frames,
                   lam=lam)
Ejemplo n.º 7
0
Archivo: io.py Proyecto: orenlivne/ober
def read_plink(**kwargs):
    '''Load a problem from the following PLINK files:
    
        Default          Override Option    Data                                Format
        ======================================================================================
        prefix.pdg.tfam  pedigree           Pedigree adjacency                  PLINK TFAM
                                            (genotyped+nongenotyped samples)
        prefix.tfam      pedigree_genotyped Genotyped sample pedigree
                                            (sub-graph of the pedigree)         PLINK TFAM
                                            corresponding to prefix.tped
        prefix.tped      genotype           Genotype data                       PLINK TPED
        prefix.hap.tped  haplotype*         Haplotype data                      PLINK TPED 
        prefix.err       error**            Genotype errors flagged             Integer array (snps x samples) 
        prefix.info      info               Problem info                        pickle (binary)
        prefix.frm       frames             LD-independent SNP frames           text file
        prefix.lam       lam***             Haplotype est. recombination rate   text file
        
        * - hap data not loaded if this option is None.
        ** - errors set to 0 if this file is not found or this option is set to None.
        *** - data not loaded if if this file is not found.
    '''
    
    # Read input options
    verbose = kwargs.get('verbose', False)
    prefix = kwargs.get('prefix', '')
    overrideable_option = lambda name, default: kwargs.get(name, default if prefix else None)
    pedigree = overrideable_option('pedigree', prefix + '.pdg.tfam')
    pedigree_genotyped = overrideable_option('pedigree_genotyped', prefix + '.tfam')
    genotype = overrideable_option('genotype', prefix + '.tped')
    haplotype = overrideable_option('haplotype', prefix + '.hap.tped')
    error_file = overrideable_option('error', prefix + '.err')
    info = overrideable_option('info', prefix + '.info')
    if not np.all([[pedigree, pedigree_genotyped, genotype, error_file] is not None]):
        raise ValueError('Must specify a prefix or pedigree, pedigree_genotyped, genotype, error files')
    frames_file = overrideable_option('frames', prefix + '.frm')
    lam_file = overrideable_option('lam', prefix + '.lam')
    
    # Load data
    print_location = lambda x : x if x else '-'
    
    if verbose: print 'Reading pedigree from %s, %s ...' % (print_location(pedigree), print_location(pedigree_genotyped),)
    p = io_pedigree.read(pedigree, genotyped_id_file=pedigree_genotyped)
    
    if verbose:  print 'Reading genotype data from %s ...' % (print_location(genotype),)
    g = io_genotype.read('plink', 'genotype', tped=genotype, load_ids=False)
    
    if verbose: print 'Reading haplotype data from %s ...' % (print_location(haplotype),)
    h = io_genotype.read('plink', 'haplotype', tped=haplotype, load_ids=False) if haplotype else None
    
    if verbose: print 'Reading error data from %s ...' % (print_location(error_file),)
    error = np.loadtxt(error_file) if error_file and os.path.isfile(error_file) else None
    
    if verbose: print 'Reading frame data from %s ...' % (print_location(frames_file),)
    frames = db_gene.snp.ld_graph.read_frames(frames_file) if frames_file else None
    
    lam = np.loadtxt(lam_file) if lam_file and os.path.isfile(lam_file) else None
    
    # info = ProblemInfo(p, g) if info is None else info    
    problem = Problem(p, g, haplotype=h, error=error, frames=frames, lam=lam)
    if haplotype and info:
        if verbose: 
            print 'Reading problem info from %s ...' % (info,)
        with open(info, 'rb') as fout:
            problem.info = pickle.load(fout)
    return problem
Ejemplo n.º 8
0
class Experiment(object):
    '''A validation experiment: start with a Problem object, clear a certain portion of the
    data, run a phaser, and cross-check the hap results against the original genotype data.'''
    #---------------------------------------------
    # Constructors
    #---------------------------------------------
    def __init__(self, problem, fraction=None, test_index=None):
        '''Initialize an experiment to be run on a problem, clearing out 'fraction' of the data. If test_index
        is specified, these specific test indices are used; otherwise a random fraction is generated.
        
        If test_index = 'hap', data is read from problem.h (haplotype array). The entire array
        is considered as a test array, but nothing is zeroed out. Useful for phasing result stats.'''
        # Create a working copy of the problem. Only the data is copied.
        if not (fraction is not None) ^ (test_index is not None):
            raise ValueError('Must specify fraction or test_index')
        self.problem = Problem(problem.pedigree, problem.genotype.copy())
        self.h = self.problem.h
        
        # Create test set; save original genotypes in g_orig
        if test_index is None:
            self.fraction = fraction
            self.g_orig, i = clear_random_portion(self.problem.genotype.data, fraction)
        elif test_index == 'hap':
            # Don't clear anything; call everything a test index.
            h = problem.h
            i = tuple(util.flattened_meshgrid(range(h.shape[0]), range(h.shape[1])))
            self.g_orig = problem.g
            self.h = h
            self.fraction = 1.0
        else:
            self.g_orig, i = clear_index(self.problem.g, test_index)
            self.fraction = (1.0 * i[0].size) / (self.h.shape[0] * self.h.shape[1])
        self.num_tests = i[0].size
        self.test_index = i
        self.r_orig = recode.recode_single_genotype(self.g_orig)
        self.fill = self.problem.fill_fraction()[:, SAMPLE]
        self.__recode_single_genotype = None
    
    #---------------------------------------------
    # Methods
    #---------------------------------------------
    def __repr__(self):
        return 'Experiment[%s, fraction=%.2f%%]' % (repr(self.problem), self.fraction) 
    
    def run(self, phaser, params=None):
        '''Run phaser (or more generally, a handler) on a problem.'''
        phaser.run(self.problem, params=params)
        self.fill = self.problem.fill_fraction()[:, 1]
    
    def num_test_genotypes(self, field):
        '''Return the number of genotypes in which both alleles were called, broken by field (SNP=0, sample=1).'''
        return self.__group_by_field(np.arange(len(self.test_index[0])), field)
    
    def where_called(self):
        '''Return the indices of genotypes in which both alleles were called.''' 
        # Positive entries of r = called entries
        return recode.where_called(self.recoded_genotype)[0]

    def called(self, field):
        '''Return the number of genotypes in which both alleles were called, broken by field (SNP=0, sample=1).'''
        return self.__group_by_field(self.where_called(), field)

    #---------------------------------------------
    # Properties
    #---------------------------------------------
    @property
    def test_orig(self):
        '''Return the original set of deleted test genotypes.'''
        i = self.test_index
        return recode.recode_single_genotype(self.h[i[SNP], i[SAMPLE], :])

    @property
    def test_called(self):
        '''Return the called set of haplotypes corersponding to the test genotypes.'''
        i = self.test_index
        return self.h[i[SNP], i[SAMPLE], :]
    
    @property
    def recoded_genotype(self):
        '''Return the genotype test set, recoded as a single number of allele pair.'''
        if self.__recode_single_genotype is None:
            self.__recode_single_genotype = recode.recode_single_genotype(self.test_called) 
        return self.__recode_single_genotype

    @property    
    def total_called(self):
        '''Return the number of genotypes in which both alleles were called.''' 
        return len(self.where_called())

    @property    
    def total_partial_called(self):
        '''Return the number of genotypes in which one alleles was called.''' 
        # Positive entries of r = called entries
        return len(recode.where_partial_called(self.recoded_genotype)[0])
    
    @property    
    def total_errors(self):
        '''Return the number of genotypes that were called incorrectly. (A genotype is an allele pair.)''' 
        # Count entries that were called (positive AND are different than the corresponding original value
        return len(recode.where_error(self.recoded_genotype, self.r_orig)[0])
    
    @property    
    def total_partial_errors(self):
        '''Return the number of genotypes that were called incorrectly. (A genotype is an allele pair.)''' 
        # Count entries that were called (positive AND are different than the corresponding original value
        # This happens when hap=(0,1), genotype=(2,2) or hap(0,2), genotype=(1,1)
        return len(recode.where_partial_error(self.recoded_genotype, self.r_orig)[0])

    @property
    def full_call_fraction(self):
        '''Return the % of correctly fully-called test genotypes.''' 
        return (1.0 * self.total_called) / self.num_tests
    
    @property
    def partial_call_fraction(self):
        '''Return the % of erroneously half-called test genotypes.''' 
        return (1.0 * self.total_partial_called) / self.num_tests

    @property
    def full_error_fraction(self):
        '''Return the % of erroneously fully-called test genotypes.''' 
        return (1.0 * self.total_errors) / self.num_tests

    @property
    def partial_error_fraction(self):
        '''Return the % of erroneously half-called test genotypes.''' 
        return (1.0 * self.total_partial_errors) / self.num_tests

    @property
    def stats(self):
        '''Return a tuple containing all experiment statistics: fraction, all call rates, run time.'''
        return (self.fraction,
                self.full_call_fraction,
                self.partial_call_fraction,
                self.full_error_fraction,
                self.partial_error_fraction)
        
    #---------------------------------------------
    # Private Methods
    #---------------------------------------------
    def __group_by_field(self, i, field):
        '''Group a test index subset i by field (SNP=0, sample=1).'''
        size = self.problem.genotype.data.shape[field]
        group_count = util.dict_to_array(statutil.group_by_value(self.test_index[field][i]))
        result = np.zeros((size,), dtype=int)
        result[group_count['k']] = group_count['v']
        return result
Ejemplo n.º 9
0
class Experiment(object):
    '''A validation experiment: start with a Problem object, clear a certain portion of the
    data, run a phaser, and cross-check the hap results against the original genotype data.'''

    #---------------------------------------------
    # Constructors
    #---------------------------------------------
    def __init__(self, problem, fraction=None, test_index=None):
        '''Initialize an experiment to be run on a problem, clearing out 'fraction' of the data. If test_index
        is specified, these specific test indices are used; otherwise a random fraction is generated.
        
        If test_index = 'hap', data is read from problem.h (haplotype array). The entire array
        is considered as a test array, but nothing is zeroed out. Useful for phasing result stats.'''
        # Create a working copy of the problem. Only the data is copied.
        if not (fraction is not None) ^ (test_index is not None):
            raise ValueError('Must specify fraction or test_index')
        self.problem = Problem(problem.pedigree, problem.genotype.copy())
        self.h = self.problem.h

        # Create test set; save original genotypes in g_orig
        if test_index is None:
            self.fraction = fraction
            self.g_orig, i = clear_random_portion(self.problem.genotype.data,
                                                  fraction)
        elif test_index == 'hap':
            # Don't clear anything; call everything a test index.
            h = problem.h
            i = tuple(
                util.flattened_meshgrid(range(h.shape[0]), range(h.shape[1])))
            self.g_orig = problem.g
            self.h = h
            self.fraction = 1.0
        else:
            self.g_orig, i = clear_index(self.problem.g, test_index)
            self.fraction = (1.0 * i[0].size) / (self.h.shape[0] *
                                                 self.h.shape[1])
        self.num_tests = i[0].size
        self.test_index = i
        self.r_orig = recode.recode_single_genotype(self.g_orig)
        self.fill = self.problem.fill_fraction()[:, SAMPLE]
        self.__recode_single_genotype = None

    #---------------------------------------------
    # Methods
    #---------------------------------------------
    def __repr__(self):
        return 'Experiment[%s, fraction=%.2f%%]' % (repr(
            self.problem), self.fraction)

    def run(self, phaser, params=None):
        '''Run phaser (or more generally, a handler) on a problem.'''
        phaser.run(self.problem, params=params)
        self.fill = self.problem.fill_fraction()[:, 1]

    def num_test_genotypes(self, field):
        '''Return the number of genotypes in which both alleles were called, broken by field (SNP=0, sample=1).'''
        return self.__group_by_field(np.arange(len(self.test_index[0])), field)

    def where_called(self):
        '''Return the indices of genotypes in which both alleles were called.'''
        # Positive entries of r = called entries
        return recode.where_called(self.recoded_genotype)[0]

    def called(self, field):
        '''Return the number of genotypes in which both alleles were called, broken by field (SNP=0, sample=1).'''
        return self.__group_by_field(self.where_called(), field)

    #---------------------------------------------
    # Properties
    #---------------------------------------------
    @property
    def test_orig(self):
        '''Return the original set of deleted test genotypes.'''
        i = self.test_index
        return recode.recode_single_genotype(self.h[i[SNP], i[SAMPLE], :])

    @property
    def test_called(self):
        '''Return the called set of haplotypes corersponding to the test genotypes.'''
        i = self.test_index
        return self.h[i[SNP], i[SAMPLE], :]

    @property
    def recoded_genotype(self):
        '''Return the genotype test set, recoded as a single number of allele pair.'''
        if self.__recode_single_genotype is None:
            self.__recode_single_genotype = recode.recode_single_genotype(
                self.test_called)
        return self.__recode_single_genotype

    @property
    def total_called(self):
        '''Return the number of genotypes in which both alleles were called.'''
        return len(self.where_called())

    @property
    def total_partial_called(self):
        '''Return the number of genotypes in which one alleles was called.'''
        # Positive entries of r = called entries
        return len(recode.where_partial_called(self.recoded_genotype)[0])

    @property
    def total_errors(self):
        '''Return the number of genotypes that were called incorrectly. (A genotype is an allele pair.)'''
        # Count entries that were called (positive AND are different than the corresponding original value
        return len(recode.where_error(self.recoded_genotype, self.r_orig)[0])

    @property
    def total_partial_errors(self):
        '''Return the number of genotypes that were called incorrectly. (A genotype is an allele pair.)'''
        # Count entries that were called (positive AND are different than the corresponding original value
        # This happens when hap=(0,1), genotype=(2,2) or hap(0,2), genotype=(1,1)
        return len(
            recode.where_partial_error(self.recoded_genotype, self.r_orig)[0])

    @property
    def full_call_fraction(self):
        '''Return the % of correctly fully-called test genotypes.'''
        return (1.0 * self.total_called) / self.num_tests

    @property
    def partial_call_fraction(self):
        '''Return the % of erroneously half-called test genotypes.'''
        return (1.0 * self.total_partial_called) / self.num_tests

    @property
    def full_error_fraction(self):
        '''Return the % of erroneously fully-called test genotypes.'''
        return (1.0 * self.total_errors) / self.num_tests

    @property
    def partial_error_fraction(self):
        '''Return the % of erroneously half-called test genotypes.'''
        return (1.0 * self.total_partial_errors) / self.num_tests

    @property
    def stats(self):
        '''Return a tuple containing all experiment statistics: fraction, all call rates, run time.'''
        return (self.fraction, self.full_call_fraction,
                self.partial_call_fraction, self.full_error_fraction,
                self.partial_error_fraction)

    #---------------------------------------------
    # Private Methods
    #---------------------------------------------
    def __group_by_field(self, i, field):
        '''Group a test index subset i by field (SNP=0, sample=1).'''
        size = self.problem.genotype.data.shape[field]
        group_count = util.dict_to_array(
            statutil.group_by_value(self.test_index[field][i]))
        result = np.zeros((size, ), dtype=int)
        result[group_count['k']] = group_count['v']
        return result
Ejemplo n.º 10
0
Archivo: io.py Proyecto: orenlivne/ober
def read_plink(**kwargs):
    '''Load a problem from the following PLINK files:
    
        Default          Override Option    Data                                Format
        ======================================================================================
        prefix.pdg.tfam  pedigree           Pedigree adjacency                  PLINK TFAM
                                            (genotyped+nongenotyped samples)
        prefix.tfam      pedigree_genotyped Genotyped sample pedigree
                                            (sub-graph of the pedigree)         PLINK TFAM
                                            corresponding to prefix.tped
        prefix.tped      genotype           Genotype data                       PLINK TPED
        prefix.hap.tped  haplotype*         Haplotype data                      PLINK TPED 
        prefix.err       error**            Genotype errors flagged             Integer array (snps x samples) 
        prefix.info      info               Problem info                        pickle (binary)
        prefix.frm       frames             LD-independent SNP frames           text file
        prefix.lam       lam***             Haplotype est. recombination rate   text file
        
        * - hap data not loaded if this option is None.
        ** - errors set to 0 if this file is not found or this option is set to None.
        *** - data not loaded if if this file is not found.
    '''

    # Read input options
    verbose = kwargs.get('verbose', False)
    prefix = kwargs.get('prefix', '')
    overrideable_option = lambda name, default: kwargs.get(
        name, default if prefix else None)
    pedigree = overrideable_option('pedigree', prefix + '.pdg.tfam')
    pedigree_genotyped = overrideable_option('pedigree_genotyped',
                                             prefix + '.tfam')
    genotype = overrideable_option('genotype', prefix + '.tped')
    haplotype = overrideable_option('haplotype', prefix + '.hap.tped')
    error_file = overrideable_option('error', prefix + '.err')
    info = overrideable_option('info', prefix + '.info')
    if not np.all([[pedigree, pedigree_genotyped, genotype, error_file]
                   is not None]):
        raise ValueError(
            'Must specify a prefix or pedigree, pedigree_genotyped, genotype, error files'
        )
    frames_file = overrideable_option('frames', prefix + '.frm')
    lam_file = overrideable_option('lam', prefix + '.lam')

    # Load data
    print_location = lambda x: x if x else '-'

    if verbose:
        print 'Reading pedigree from %s, %s ...' % (
            print_location(pedigree),
            print_location(pedigree_genotyped),
        )
    p = io_pedigree.read(pedigree, genotyped_id_file=pedigree_genotyped)

    if verbose:
        print 'Reading genotype data from %s ...' % (
            print_location(genotype), )
    g = io_genotype.read('plink', 'genotype', tped=genotype, load_ids=False)

    if verbose:
        print 'Reading haplotype data from %s ...' % (
            print_location(haplotype), )
    h = io_genotype.read('plink', 'haplotype', tped=haplotype,
                         load_ids=False) if haplotype else None

    if verbose:
        print 'Reading error data from %s ...' % (print_location(error_file), )
    error = np.loadtxt(
        error_file) if error_file and os.path.isfile(error_file) else None

    if verbose:
        print 'Reading frame data from %s ...' % (
            print_location(frames_file), )
    frames = db_gene.snp.ld_graph.read_frames(
        frames_file) if frames_file else None

    lam = np.loadtxt(
        lam_file) if lam_file and os.path.isfile(lam_file) else None

    # info = ProblemInfo(p, g) if info is None else info
    problem = Problem(p, g, haplotype=h, error=error, frames=frames, lam=lam)
    if haplotype and info:
        if verbose:
            print 'Reading problem info from %s ...' % (info, )
        with open(info, 'rb') as fout:
            problem.info = pickle.load(fout)
    return problem