Python smartopenの例、pydigree.io.smartopen.smartopen Pythonの例

コード例 #1

0

ファイルを表示

def test_smartopen():
    from pydigree.io.smartopen import smartopen 
    datadir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'test_data', 'compression')

    # Plain text 
    with smartopen(os.path.join(datadir, 'test')) as f:
        d = f.readlines()
        assert all(type(x) is str for x in d)
        assert [x.strip() for x in d] == ['genetics', 'pydigree', 'dna']

    # Gzip  
    with smartopen(os.path.join(datadir, 'test.gz')) as f:
        d = f.readlines()
        assert all(type(x) is str for x in d)
        assert [x.strip() for x in d] == ['genetics', 'pydigree', 'dna']

    # bz2
    with smartopen(os.path.join(datadir, 'test.gz')) as f:
        d = f.readlines()
        assert all(type(x) is str for x in d)
        assert [x.strip() for x in d] == ['genetics', 'pydigree', 'dna']

    # xz
    with smartopen(os.path.join(datadir, 'test.xz')) as f:
        d = f.readlines()
        assert all(type(x) is str for x in d)
        assert [x.strip() for x in d] == ['genetics', 'pydigree', 'dna']

    # lzma
    with smartopen(os.path.join(datadir, 'test.lzma')) as f:
        d = f.readlines()
        assert all(type(x) is str for x in d)
        assert [x.strip() for x in d] == ['genetics', 'pydigree', 'dna']

コード例 #2

0

ファイルを表示

ファイル: test_io.py プロジェクト: y-chai/pydigree

def test_smartopen():
    from pydigree.io.smartopen import smartopen 
    datadir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'test_data', 'compression')

    # Plain text 
    with smartopen(os.path.join(datadir, 'test')) as f:
        d = f.readlines()
        assert all(type(x) is str for x in d)
        assert [x.strip() for x in d] == ['genetics', 'pydigree', 'dna']

    # Gzip  
    with smartopen(os.path.join(datadir, 'test.gz')) as f:
        d = f.readlines()
        assert all(type(x) is str for x in d)
        assert [x.strip() for x in d] == ['genetics', 'pydigree', 'dna']

    # bz2
    with smartopen(os.path.join(datadir, 'test.gz')) as f:
        d = f.readlines()
        assert all(type(x) is str for x in d)
        assert [x.strip() for x in d] == ['genetics', 'pydigree', 'dna']

    # xz
    with smartopen(os.path.join(datadir, 'test.xz')) as f:
        d = f.readlines()
        assert all(type(x) is str for x in d)
        assert [x.strip() for x in d] == ['genetics', 'pydigree', 'dna']

    # lzma
    with smartopen(os.path.join(datadir, 'test.lzma')) as f:
        d = f.readlines()
        assert all(type(x) is str for x in d)
        assert [x.strip() for x in d] == ['genetics', 'pydigree', 'dna']

コード例 #3

0

ファイルを表示

ファイル: simulation.py プロジェクト: y-chai/pydigree

 def _writeibd(self, replicatenumber):
     # Warning: Don't call this function! If the individuals in the pedigree dont have
     # LABEL genotypes, you're just going to get IBS configurations at each locus, not
     # actual IBD calculations.
     #
     # If you have data you want to identify IBD segments in, check
     # pydigree.sgs
     with smartopen(
             '{0}-{1}.ibd.gz'.format(self.label, replicatenumber + 1),
             'w') as of:
         for ped in self.template.pedigrees:
             for ind1, ind2 in combinations_with_replacement(
                     ped.individuals, 2):
                 identical = []
                 for chrom_idx in range(ind1.chromosomes.nchrom()):
                     if ind1 == ind2:
                         genos = zip(*ind1.genotypes[chrom_idx])
                         ibd = [2 * (x == y) for x, y in genos]
                     else:
                         genos1 = zip(*ind1.genotypes[chrom_idx])
                         genos2 = zip(*ind2.genotypes[chrom_idx])
                         ibd = [
                             ibs(g1, g2) for g1, g2 in zip(genos1, genos2)
                         ]
                     identical.extend(ibd)
                 outline = [ped.label, ind1.label, ind2.label] + identical
                 outline = ' '.join([str(x) for x in outline])
                 of.write('{}\n'.format(outline))

コード例 #4

0

ファイルを表示

ファイル: base.py プロジェクト: jameshicks/pydigree

def write_phenotypes(pedigrees, filename, predicate=None,
                     missingcode='X', delim=','):
    """
    Writes phenotypes to a CSV (or other field delimited) file
    
    :param pedigrees: Data to write
    :param filename: filename to write to
    :param missingcode: code to use for missing values
    :param delim: output field separator 

    :type missingcode: string
    :type delim: string
    """
    inds = pedigrees.individuals

    if isinstance(predicate, Callable):
        inds = [x for x in inds if predicate(x)]

    available_phenotypes = reduce(set.union,
                                  [set(x.phenotypes.keys()) for x in inds])
    available_phenotypes = sorted(available_phenotypes)
    header = ['famid', 'id'] + available_phenotypes

    with smartopen(filename, 'w') as ofile:
        ofile.write(delim.join([str(x) for x in header]) + '\n')
        for ind in inds:
            row = [ind.population.label, ind.label]
            row += [ind.phenotypes.get(phenotype, missingcode)
                    for phenotype in available_phenotypes]
            row = delim.join([str(x) for x in row])
            ofile.write(row + '\n')

コード例 #5

0

ファイルを表示

ファイル: base.py プロジェクト: jameshicks/pydigree

def read_phenotypes(pedigrees, csvfile, delimiter=',', missingcode='X'):
    """
    Reads a csv with header
    famid, ind, phen, phen, phen, phen etc etc

    Arguments
    :param pedigrees:   data to update
    :param csvfile:     the filename of the file containing phenotypes.
    :param delimiter:   the field delimiter for the file
    :param missingcode: the code for missing values
    :type pedigrees: PedigreeCollection
    :type csvfile: string
    :type missingcode: string

    :rtype: void
    """
    with smartopen(csvfile) as f:
        header = f.readline().strip().split(delimiter)
        for line in f:
            # Match columns to their column name
            d = dict(list(zip(header, line.strip().split(delimiter))))
            for k, v in list(d.items()):
                
                # Convert all phenotypes into floats
                try:
                    v = float(v)
                except ValueError:
                    if not v or v == missingcode:
                        v = None
                
                if k in set(['famid', 'id']):
                    continue
                
                fam, ind = d['famid'], d['id']
                pedigrees[fam][ind].phenotypes[k] = v

コード例 #6

0

ファイルを表示

ファイル: plink.py プロジェクト: jameshicks/pydigree

def write_map(pedigrees, mapfile, output_chromosomes=None):
    '''
    Writes the genotype location data to a PLINK MAP file

    :param pedigrees: the population containing the data to be written
    :param mapfile: the name of the file to be output to
    :param output_chromosomes: which chromosomes to write

    Returns: Nothing
    '''
    # Check if we're only supposed to be outputting certain chromosomes
    if output_chromosomes is not None:
        checkchroms = True
    else:
        checkchroms = False

    with smartopen(mapfile, 'w') as f:
        for chrom in pedigrees.chromosomes:
            if checkchroms and chrom.outputlabel not in output_chromosomes:
                continue
            for mi, marker in enumerate(chrom.iterinfo()):
                label, cm, mb, _ = marker
                if not mb:
                    mb = int(cm * 10e6)
                if not label:
                    label = 'SNP%s-%s' % (chrom.outputlabel, mi)

                rec = [chrom.outputlabel, label, cm, mb]
                outline = '\t'.join(str(x) for x in rec)
                f.write(outline + '\n')

コード例 #7

0

ファイルを表示

ファイル: plink.py プロジェクト: y-chai/pydigree

def write_map(pedigrees, mapfile, output_chromosomes=None):
    '''
    Writes the genotype location data to a PLINK MAP file

    :param pedigrees: the population containing the data to be written
    :param mapfile: the name of the file to be output to
    :param output_chromosomes: which chromosomes to write

    Returns: Nothing
    '''
    # Check if we're only supposed to be outputting certain chromosomes
    if output_chromosomes is not None:
        checkchroms = True
    else:
        checkchroms = False

    with smartopen(mapfile, 'w') as f:
        for chrom in pedigrees.chromosomes:
            if checkchroms and chrom.outputlabel not in output_chromosomes:
                continue
            for mi, marker in enumerate(chrom.iterinfo()):
                label, cm, mb, _ = marker
                if not mb:
                    mb = int(cm * 10e6)
                if not label:
                    label = 'SNP%s-%s' % (chrom.outputlabel, mi)

                rec = [chrom.outputlabel, label, cm, mb]
                outline = '\t'.join(str(x) for x in rec)
                f.write(outline + '\n')

コード例 #8

0

ファイルを表示

def read_gs_chromosome_template(templatef):
    """
    Reads a genomeSIMLA format chromosome template file
    
    :param templatef: The filename of the template file
    :type templatef: string

    :rtype: A ChromosomeTemplate object corresponding to the file
    """
    with smartopen(templatef) as f:
        label = f.readline().strip()  # The label and
        f.readline()  # the number of markers, both of which we dont need.
        c = pydigree.ChromosomeTemplate(label=label)

        # genomeSIMLA chromosome files have marginal recombination probs
        # instead of map positions. We'll have to keep track of what the
        # last position was and add to it to get it into the shape we want
        # it to be in.
        last_cm = 0
        for line in f:
            if line == '\n':
                continue
            label, _, minf, cm, bp = line.strip().split()
            bp = int(bp)
            cm = float(cm)
            last_cm += cm
            c.add_genotype(float(minf), last_cm, label=label, bp=bp)
    return c

コード例 #9

0

ファイルを表示

ファイル: genomesimla.py プロジェクト: jameshicks/pydigree

def read_gs_chromosome_template(templatef):
    """
    Reads a genomeSIMLA format chromosome template file
    
    :param templatef: The filename of the template file
    :type templatef: string

    :rtype: A ChromosomeTemplate object corresponding to the file
    """
    with smartopen(templatef) as f:
        label = f.readline().strip()  # The label and
        f.readline()  # the number of markers, both of which we dont need.
        c = pydigree.ChromosomeTemplate(label=label)
        
        # genomeSIMLA chromosome files have marginal recombination probs
        # instead of map positions. We'll have to keep track of what the
        # last position was and add to it to get it into the shape we want
        # it to be in.
        last_cm = 0
        for line in f:
            if line == '\n':
                continue
            label, _, minf, cm, bp = line.strip().split()
            bp = int(bp)
            cm = float(cm)
            last_cm += cm
            c.add_genotype(float(minf), last_cm, label=label, bp=bp)
    return c

コード例 #10

0

ファイルを表示

def read_phenotypes(pedigrees, csvfile, delimiter=',', missingcode='X'):
    """
    Reads a csv with header
    famid, ind, phen, phen, phen, phen etc etc

    Arguments
    :param pedigrees:   data to update
    :param csvfile:     the filename of the file containing phenotypes.
    :param delimiter:   the field delimiter for the file
    :param missingcode: the code for missing values
    :type pedigrees: PedigreeCollection
    :type csvfile: string
    :type missingcode: string

    :rtype: void
    """
    with smartopen(csvfile) as f:
        header = f.readline().strip().split(delimiter)
        for line in f:
            # Match columns to their column name
            d = dict(list(zip(header, line.strip().split(delimiter))))
            for k, v in list(d.items()):

                # Convert all phenotypes into floats
                try:
                    v = float(v)
                except ValueError:
                    if not v or v == missingcode:
                        v = None

                if k in set(['famid', 'id']):
                    continue

                fam, ind = d['famid'], d['id']
                pedigrees[fam][ind].phenotypes[k] = v

コード例 #11

0

ファイルを表示

def write_pedigree(pedigrees, filename, delim=' '):
    ''' 
    Writes pedigree to a LINKAGE formatted pedigree file 

    :param pedigrees: Data to write
    :param filename: filename to write to
    :param delim: output field separator 

    :rtype: void
    '''
    sorting_key = lambda x: (x.population.label, x.depth, x.label)
    with smartopen(filename, 'w') as f:
        for ind in sorted(pedigrees.individuals, key=sorting_key):
            oline = [
                ind.population.label, ind.label,
                '0' if ind.is_founder() else ind.father.label,
                '0' if ind.is_founder() else ind.mother.label,
                '1' if ind.sex == 1 else '0', '-9'
            ]

            oline = delim.join(oline)

            f.write(oline + '\n')

コード例 #12

0

ファイルを表示

ファイル: base.py プロジェクト: jameshicks/pydigree

def write_pedigree(pedigrees, filename, delim=' '):
    ''' 
    Writes pedigree to a LINKAGE formatted pedigree file 

    :param pedigrees: Data to write
    :param filename: filename to write to
    :param delim: output field separator 

    :rtype: void
    '''
    sorting_key = lambda x: (x.population.label, x.depth, x.label)
    with smartopen(filename, 'w') as f:
        for ind in sorted(pedigrees.individuals, key=sorting_key):
            oline = [ind.population.label,
                     ind.label,
                     '0' if ind.is_founder() else ind.father.label,
                     '0' if ind.is_founder() else ind.mother.label,
                     '1' if ind.sex == 1 else '0',
                     '-9']
            
            oline = delim.join(oline)
            
            f.write(oline + '\n')

コード例 #13

0

ファイルを表示

ファイル: simulation.py プロジェクト: jameshicks/pydigree

 def _writeibd(self, replicatenumber):
     # Warning: Don't call this function! If the individuals in the pedigree dont have
     # LABEL genotypes, you're just going to get IBS configurations at each locus, not
     # actual IBD calculations.
     #
     # If you have data you want to identify IBD segments in, check
     # pydigree.sgs
     with smartopen('{0}-{1}.ibd.gz'.format(self.label, replicatenumber + 1), 'w') as of:
         for ped in self.template.pedigrees:
             for ind1, ind2 in combinations_with_replacement(ped.individuals, 2):
                 identical = []
                 for chrom_idx in range(ind1.chromosomes.nchrom()):
                     if ind1 == ind2:
                         genos = zip(*ind1.genotypes[chrom_idx])
                         ibd = [2 * (x == y) for x, y in genos]
                     else:
                         genos1 = zip(*ind1.genotypes[chrom_idx])
                         genos2 = zip(*ind2.genotypes[chrom_idx])
                         ibd = [ibs(g1, g2)
                                for g1, g2 in zip(genos1, genos2)]
                     identical.extend(ibd)
                 outline = [ped.label, ind1.label, ind2.label] + identical
                 outline = ' '.join([str(x) for x in outline])
                 of.write('{}\n'.format(outline))

コード例 #14

0

ファイルを表示

def write_phenotypes(pedigrees,
                     filename,
                     predicate=None,
                     missingcode='X',
                     delim=','):
    """
    Writes phenotypes to a CSV (or other field delimited) file
    
    :param pedigrees: Data to write
    :param filename: filename to write to
    :param missingcode: code to use for missing values
    :param delim: output field separator 

    :type missingcode: string
    :type delim: string
    """
    inds = pedigrees.individuals

    if isinstance(predicate, Callable):
        inds = [x for x in inds if predicate(x)]

    available_phenotypes = reduce(set.union,
                                  [set(x.phenotypes.keys()) for x in inds])
    available_phenotypes = sorted(available_phenotypes)
    header = ['famid', 'id'] + available_phenotypes

    with smartopen(filename, 'w') as ofile:
        ofile.write(delim.join([str(x) for x in header]) + '\n')
        for ind in inds:
            row = [ind.population.label, ind.label]
            row += [
                ind.phenotypes.get(phenotype, missingcode)
                for phenotype in available_phenotypes
            ]
            row = delim.join([str(x) for x in row])
            ofile.write(row + '\n')

コード例 #15

0

ファイルを表示

ファイル: plink.py プロジェクト: y-chai/pydigree

def read_map(mapfile):
    """
    Reads a PLINK map file into a list of ChromosomeTemplate objects

    
    :param mapfile: Path of the file to be read
    :type mapfile: string

    :rtype: a list of ChromosomeTemplate objects
    """
    last_chr, last_pos = None, 0
    chroms = ChromosomeSet()
    chromosome = None
    with smartopen(mapfile) as f:
        for i, line in enumerate(f):
            line = line.strip().split()
            chrom, label, cm, pos = line
            cm, pos = float(cm), int(pos)
            if pos < 0:
                raise FileFormatError('Invalid position: {}'.format(pos))
            if chrom != last_chr:
                # If this happens, we've moved on to a new chromosome,
                # or we've just started. If we haven't just started, We'll
                # close up the old one
                if i > 0:
                    chromosome.finalize()
                    chroms.add_chromosome(chromosome)
                # Make the next chromosome
                chromosome = ChromosomeTemplate(label=chrom)
            elif pos < last_pos:
                raise FileFormatError('Map file not sorted')
            chromosome.add_genotype(None, cm, label=label, bp=pos)
            last_chr, last_pos = chrom, pos
    chromosome.finalize()
    chroms.add_chromosome(chromosome)
    return chroms

コード例 #16

0

ファイルを表示

ファイル: plink.py プロジェクト: jameshicks/pydigree

def read_map(mapfile):
    """
    Reads a PLINK map file into a list of ChromosomeTemplate objects

    
    :param mapfile: Path of the file to be read
    :type mapfile: string

    :rtype: a list of ChromosomeTemplate objects
    """
    last_chr, last_pos = None, 0
    chroms = ChromosomeSet()
    chromosome = None
    with smartopen(mapfile) as f:
        for i, line in enumerate(f):
            line = line.strip().split()
            chrom, label, cm, pos = line
            cm, pos = float(cm), int(pos)
            if pos < 0:
                raise FileFormatError('Invalid position: {}'.format(pos))
            if chrom != last_chr:
                # If this happens, we've moved on to a new chromosome,
                # or we've just started. If we haven't just started, We'll
                # close up the old one
                if i > 0:
                    chromosome.finalize()
                    chroms.add_chromosome(chromosome)
                # Make the next chromosome
                chromosome = ChromosomeTemplate(label=chrom)
            elif pos < last_pos:
                raise FileFormatError('Map file not sorted')
            chromosome.add_genotype(None, cm, label=label, bp=pos)
            last_chr, last_pos = chrom, pos
    chromosome.finalize()
    chroms.add_chromosome(chromosome)
    return chroms

コード例 #17

0

ファイルを表示

def read_ped(filename,
             population=None,
             delimiter=None,
             affected_labels=None,
             population_handler=None,
             data_handler=None,
             connect_inds=True,
             onlyinds=None):
    """
    Reads a plink format pedigree file, ie:
    
    ::    
        familyid indid father mother sex whatever whatever whatever
    
    into a pydigree pedigree object, with optional population to
    assign to pedigree members. If you don't provide a population
    you can't simulate genotypes!


    :param filename: The file to be read
    :param population: The population to assign individuals to
    :param delimiter: a string defining the field separator, 
        default: any whitespace
    :param affected_labels: The labels that determine affection status.
    :param population_handler: a function to set up the population 
    :param data_handler: a function to turn the 
        data into useful individual information
    :param connect_inds: build references between individuals. Requires all
        individuals be present in the file
    :param onlyinds: only include data for specified individuals 

    :type filename: string
    :type population: Population
    :type delimiter: string
    :type affected_labels: dict (str -> value)
    :type data_handler: callable
    :type connect_inds: bool
    :type onlyinds: iterable 


    :returns: individuals contained in the pedigree file 
    :rtype: PedigreeCollection
    """

    if not affected_labels:
        affected_labels = {
            '1': 0,
            '2': 1,
            'A': 1,
            'U': 0,
            'X': None,
            '-9': None
        }

    if not isinstance(data_handler, Callable):
        data_handler = lambda *x: None

    if not isinstance(population_handler, Callable):
        population_handler = lambda *x: None

    population = Population() if population is None else population
    p = Pedigree()

    population_handler(p)

    # Step 1: Read the data and create the individuals
    with smartopen(filename) as f:
        # Parse the lines in the file
        for line in f:
            rec = PEDRecord(line, delimiter)

            if onlyinds and (rec.ind_id not in onlyinds):
                continue

            ind = rec.create_individual(population)
            ind.pedigree = p
            ind.phenotypes['affected'] = affected_labels.get(rec.aff, None)
            p[ind.label] = ind

            if rec.data:
                data_handler(p[ind.label], rec.data)

    # Step 2: Create the between-individual relationships

    # Fix the individual-level data: individuals currently only have parent-ids
    # in their parent fields and not references to actual individuals
    if connect_inds:
        connect_individuals(p)

    # Step 3: Separate the individuals into pedigrees
    pc = sort_pedigrees(p.individuals, population_handler)

    return pc

コード例 #18

0

ファイルを表示

ファイル: plink.py プロジェクト: jameshicks/pydigree

def write_ped(pedigrees, pedfile, delim=' ', predicate=None,
              output_chromosomes=None):
    """
    write_ped writes data in a plink-format PED file, and optionally a
    plink-format map file.


    :param pedigrees: An object of class PedigreeCollection containing what you
        want to output
    :param pedfile: a string giving the name out the file to output to.
    :param mapfile: the name of a mapfile to output, if you want to output one.
        an object that evaluates as False or None will skip the mapfile
    :param genotypes: Should genotypes be output True/False
    :param delim: Field seperator
    :param predicate: Which inputs to include in the output file. If not 
        specified all are output. If the string is 'affected', only affected
        individuals are output. If the string is 'phenotyped', all individuals
        with phenotype information are output. Any other value of predicate
        must be a function to perform on the individual that evaluates to
        True/False for whether the individual should be output.

    Returns: Nothing
    """

    # Check if we're only supposed to be outputting certain chromosomes
    checkchroms = output_chromosomes is not None
    
    if not predicate:
        predicate = lambda x: True
    elif predicate == 'affected':
        predicate = lambda x: x.phenotypes['affected'] == 1
    elif predicate == 'phenotyped':
        predicate = lambda x: x.phenotypes['affected'] in set([0, 1])
    elif not isinstance(predicate, collections.Callable):
        raise ValueError('Not a valid predicate!')

    pheno_label = {1: '2', 0: '1', None: '-9'}

    def getlab(ind, default):
        """
        Gets the label of an individual, or return different value ind is None
        """
        return ind.label if ind is not None else default

    with smartopen(pedfile, 'w') as f:
        for pedigree in pedigrees.pedigrees:
            for ind in pedigree.individuals:
                if not predicate(ind):
                    continue

                # Prepare the 6-column identifier
                outline = [pedigree.label, ind.label,
                           getlab(ind.father, '0'),
                           getlab(ind.mother, '0'),
                           1 if ind.sex == 0 else 2,
                           pheno_label[ind.phenotypes['affected']]]
                # Make strings
                outline = list(map(str, outline))

                # Get the genotypes in the format we need them
                g = []
                for template, chromatids in zip(ind.chromosomes, ind.genotypes):
                    if checkchroms and template.outputlabel not in output_chromosomes:
                        continue
                    chroma, chromb = chromatids
                    if isinstance(chroma, SparseAlleles):
                        raise ValueError("Plink output not for Sparse Data")

                    ga = chroma.astype(str).tolist()
                    gb = chromb.astype(str).tolist()
                    gn = interleave(ga, gb)
                    g.extend(gn)

                outline.extend(g)

                # Write it out
                outline = delim.join(outline)
                f.write(outline)
                f.write('\n')

コード例 #19

0

ファイルを表示

ファイル: base.py プロジェクト: jameshicks/pydigree

def read_ped(filename, population=None, delimiter=None, affected_labels=None,
             population_handler=None, data_handler=None, connect_inds=True,
             onlyinds=None):
    """
    Reads a plink format pedigree file, ie:
    
    ::    
        familyid indid father mother sex whatever whatever whatever
    
    into a pydigree pedigree object, with optional population to
    assign to pedigree members. If you don't provide a population
    you can't simulate genotypes!


    :param filename: The file to be read
    :param population: The population to assign individuals to
    :param delimiter: a string defining the field separator, 
        default: any whitespace
    :param affected_labels: The labels that determine affection status.
    :param population_handler: a function to set up the population 
    :param data_handler: a function to turn the 
        data into useful individual information
    :param connect_inds: build references between individuals. Requires all
        individuals be present in the file
    :param onlyinds: only include data for specified individuals 

    :type filename: string
    :type population: Population
    :type delimiter: string
    :type affected_labels: dict (str -> value)
    :type data_handler: callable
    :type connect_inds: bool
    :type onlyinds: iterable 


    :returns: individuals contained in the pedigree file 
    :rtype: PedigreeCollection
    """

    if not affected_labels:
        affected_labels = {'1': 0, '2': 1, 'A': 1, 'U': 0,
                           'X': None, '-9': None}

    if not isinstance(data_handler, Callable):
        data_handler = lambda *x: None
    
    if not isinstance(population_handler, Callable):
        population_handler = lambda *x: None

    population = Population() if population is None else population
    p = Pedigree()

    population_handler(p)

    # Step 1: Read the data and create the individuals
    with smartopen(filename) as f:
        # Parse the lines in the file
        for line in f:
            rec = PEDRecord(line, delimiter)
            
            if onlyinds and (rec.ind_id not in onlyinds):
                continue

            ind = rec.create_individual(population)
            ind.pedigree = p
            ind.phenotypes['affected'] = affected_labels.get(rec.aff, None)
            p[ind.label] = ind

            if rec.data:
                data_handler(p[ind.label], rec.data)

    # Step 2: Create the between-individual relationships

    # Fix the individual-level data: individuals currently only have parent-ids
    # in their parent fields and not references to actual individuals
    if connect_inds:
        connect_individuals(p)

    # Step 3: Separate the individuals into pedigrees
    pc = sort_pedigrees(p.individuals, population_handler)


    return pc

コード例 #20

0

ファイルを表示

ファイル: plink.py プロジェクト: y-chai/pydigree

def write_ped(pedigrees,
              pedfile,
              delim=' ',
              predicate=None,
              output_chromosomes=None):
    """
    write_ped writes data in a plink-format PED file, and optionally a
    plink-format map file.


    :param pedigrees: An object of class PedigreeCollection containing what you
        want to output
    :param pedfile: a string giving the name out the file to output to.
    :param mapfile: the name of a mapfile to output, if you want to output one.
        an object that evaluates as False or None will skip the mapfile
    :param genotypes: Should genotypes be output True/False
    :param delim: Field seperator
    :param predicate: Which inputs to include in the output file. If not 
        specified all are output. If the string is 'affected', only affected
        individuals are output. If the string is 'phenotyped', all individuals
        with phenotype information are output. Any other value of predicate
        must be a function to perform on the individual that evaluates to
        True/False for whether the individual should be output.

    Returns: Nothing
    """

    # Check if we're only supposed to be outputting certain chromosomes
    checkchroms = output_chromosomes is not None

    if not predicate:
        predicate = lambda x: True
    elif predicate == 'affected':
        predicate = lambda x: x.phenotypes['affected'] == 1
    elif predicate == 'phenotyped':
        predicate = lambda x: x.phenotypes['affected'] in set([0, 1])
    elif not isinstance(predicate, collections.Callable):
        raise ValueError('Not a valid predicate!')

    pheno_label = {1: '2', 0: '1', None: '-9'}

    def getlab(ind, default):
        """
        Gets the label of an individual, or return different value ind is None
        """
        return ind.label if ind is not None else default

    with smartopen(pedfile, 'w') as f:
        for pedigree in pedigrees.pedigrees:
            for ind in pedigree.individuals:
                if not predicate(ind):
                    continue

                # Prepare the 6-column identifier
                outline = [
                    pedigree.label, ind.label,
                    getlab(ind.father, '0'),
                    getlab(ind.mother, '0'), 1 if ind.sex == 0 else 2,
                    pheno_label[ind.phenotypes['affected']]
                ]
                # Make strings
                outline = list(map(str, outline))

                # Get the genotypes in the format we need them
                g = []
                for template, chromatids in zip(ind.chromosomes,
                                                ind.genotypes):
                    if checkchroms and template.outputlabel not in output_chromosomes:
                        continue
                    chroma, chromb = chromatids
                    if isinstance(chroma, SparseAlleles):
                        raise ValueError("Plink output not for Sparse Data")

                    ga = chroma.astype(str).tolist()
                    gb = chromb.astype(str).tolist()
                    gn = interleave(ga, gb)
                    g.extend(gn)

                outline.extend(g)

                # Write it out
                outline = delim.join(outline)
                f.write(outline)
                f.write('\n')