Example #1
0
def test_interleave():
    assert interleave([1, 1, 1], [2, 2, 2]) == [1, 2, 1, 2, 1, 2]
    assert interleave([1], [2]) == [1, 2]
    assert interleave([], []) == []
    assert_raises(ValueError, interleave, [1], [])
Example #2
0
def test_interleave():
    assert interleave([1,1,1], [2,2,2]) == [1,2,1,2,1,2]
    assert interleave([1],[2]) == [1,2]
    assert interleave([],[]) == []
    assert_raises(ValueError, interleave, [1], [])
Example #3
0
def write_ped(pedigrees, pedfile, delim=" ", predicate=None, output_chromosomes=None):
    """
    write_ped writes data in a plink-format PED file, and optionally a
    plink-format map file.

    Arguments
    ------

    pedigrees: An object of class PedigreeCollection containing what you
        want to output
    pedfile: a string giving the name out the file to output to.
    mapfile: the name of a mapfile to output, if you want to output one.
        an object that evaluates as False or None will skip the mapfile
    genotypes: Should genotypes be output True/False
    delim: Field seperator
    predicate: Which inputs to include in the output file. If not specified
        all are output. If the string is 'affected', only affected
        individuals are output. If the string is 'phenotyped', all individuals
        with phenotype information are output. Any other value of predicate
        must be a function to perform on the individual that evaluates to
        True/False for whether the individual should be output.

    Returns: Nothing
    """

    # Check if we're only supposed to be outputting certain chromosomes
    if output_chromosomes is not None:
        checkchroms = True
    else:
        checkchroms = False

    if not predicate:
        predicate = lambda x: True
    elif predicate == "affected":
        predicate = lambda x: x.phenotypes["affected"] == 1
    elif predicate == "phenotyped":
        predicate = lambda x: x.phenotypes["affected"] in set([0, 1])
    elif not isinstance(predicate, collections.Callable):
        raise ValueError("Not a valid predicate!")

    afflab = {1: "2", 0: "1", None: "-9"}

    with open(pedfile, "w") as f:
        for pedigree in pedigrees.pedigrees:
            for ind in pedigree.individuals:
                if not predicate(ind):
                    continue
                # Get the phenotype code
                aff = afflab[ind.phenotypes["affected"]]
                # Prepare the 6-column identifier
                outline = [
                    pedigree.label,
                    ind.label,
                    ind.father.label if ind.father is not None else "0",
                    ind.mother.label if ind.mother is not None else "0",
                    1 if ind.sex == 0 else 2,
                    aff,
                ]
                # Make strings
                outline = list(map(str, outline))

                # Get the genotypes in the format we need them
                g = []
                for template, chromatids in zip(ind.chromosomes, ind.genotypes):
                    if checkchroms and template.outputlabel not in output_chromosomes:
                        continue
                    chroma, chromb = chromatids
                    ga = chroma.astype(str).tolist()
                    gb = chromb.astype(str).tolist()
                    gn = interleave(ga, gb)
                    g.extend(gn)
                outline.extend(g)

                # Write it out
                outline = delim.join(outline)
                f.write(outline)
                f.write("\n")
Example #4
0
def write_ped(pedigrees,
              pedfile,
              delim=' ',
              predicate=None,
              output_chromosomes=None):
    """
    write_ped writes data in a plink-format PED file, and optionally a
    plink-format map file.


    :param pedigrees: An object of class PedigreeCollection containing what you
        want to output
    :param pedfile: a string giving the name out the file to output to.
    :param mapfile: the name of a mapfile to output, if you want to output one.
        an object that evaluates as False or None will skip the mapfile
    :param genotypes: Should genotypes be output True/False
    :param delim: Field seperator
    :param predicate: Which inputs to include in the output file. If not 
        specified all are output. If the string is 'affected', only affected
        individuals are output. If the string is 'phenotyped', all individuals
        with phenotype information are output. Any other value of predicate
        must be a function to perform on the individual that evaluates to
        True/False for whether the individual should be output.

    Returns: Nothing
    """

    # Check if we're only supposed to be outputting certain chromosomes
    checkchroms = output_chromosomes is not None

    if not predicate:
        predicate = lambda x: True
    elif predicate == 'affected':
        predicate = lambda x: x.phenotypes['affected'] == 1
    elif predicate == 'phenotyped':
        predicate = lambda x: x.phenotypes['affected'] in set([0, 1])
    elif not isinstance(predicate, collections.Callable):
        raise ValueError('Not a valid predicate!')

    pheno_label = {1: '2', 0: '1', None: '-9'}

    def getlab(ind, default):
        """
        Gets the label of an individual, or return different value ind is None
        """
        return ind.label if ind is not None else default

    with smartopen(pedfile, 'w') as f:
        for pedigree in pedigrees.pedigrees:
            for ind in pedigree.individuals:
                if not predicate(ind):
                    continue

                # Prepare the 6-column identifier
                outline = [
                    pedigree.label, ind.label,
                    getlab(ind.father, '0'),
                    getlab(ind.mother, '0'), 1 if ind.sex == 0 else 2,
                    pheno_label[ind.phenotypes['affected']]
                ]
                # Make strings
                outline = list(map(str, outline))

                # Get the genotypes in the format we need them
                g = []
                for template, chromatids in zip(ind.chromosomes,
                                                ind.genotypes):
                    if checkchroms and template.outputlabel not in output_chromosomes:
                        continue
                    chroma, chromb = chromatids
                    if isinstance(chroma, SparseAlleles):
                        raise ValueError("Plink output not for Sparse Data")

                    ga = chroma.astype(str).tolist()
                    gb = chromb.astype(str).tolist()
                    gn = interleave(ga, gb)
                    g.extend(gn)

                outline.extend(g)

                # Write it out
                outline = delim.join(outline)
                f.write(outline)
                f.write('\n')
Example #5
0
def write_ped(pedigrees, pedfile, delim=' ', predicate=None,
              output_chromosomes=None):
    """
    write_ped writes data in a plink-format PED file, and optionally a
    plink-format map file.


    :param pedigrees: An object of class PedigreeCollection containing what you
        want to output
    :param pedfile: a string giving the name out the file to output to.
    :param mapfile: the name of a mapfile to output, if you want to output one.
        an object that evaluates as False or None will skip the mapfile
    :param genotypes: Should genotypes be output True/False
    :param delim: Field seperator
    :param predicate: Which inputs to include in the output file. If not 
        specified all are output. If the string is 'affected', only affected
        individuals are output. If the string is 'phenotyped', all individuals
        with phenotype information are output. Any other value of predicate
        must be a function to perform on the individual that evaluates to
        True/False for whether the individual should be output.

    Returns: Nothing
    """

    # Check if we're only supposed to be outputting certain chromosomes
    checkchroms = output_chromosomes is not None
    
    if not predicate:
        predicate = lambda x: True
    elif predicate == 'affected':
        predicate = lambda x: x.phenotypes['affected'] == 1
    elif predicate == 'phenotyped':
        predicate = lambda x: x.phenotypes['affected'] in set([0, 1])
    elif not isinstance(predicate, collections.Callable):
        raise ValueError('Not a valid predicate!')

    pheno_label = {1: '2', 0: '1', None: '-9'}

    def getlab(ind, default):
        """
        Gets the label of an individual, or return different value ind is None
        """
        return ind.label if ind is not None else default

    with smartopen(pedfile, 'w') as f:
        for pedigree in pedigrees.pedigrees:
            for ind in pedigree.individuals:
                if not predicate(ind):
                    continue

                # Prepare the 6-column identifier
                outline = [pedigree.label, ind.label,
                           getlab(ind.father, '0'),
                           getlab(ind.mother, '0'),
                           1 if ind.sex == 0 else 2,
                           pheno_label[ind.phenotypes['affected']]]
                # Make strings
                outline = list(map(str, outline))

                # Get the genotypes in the format we need them
                g = []
                for template, chromatids in zip(ind.chromosomes, ind.genotypes):
                    if checkchroms and template.outputlabel not in output_chromosomes:
                        continue
                    chroma, chromb = chromatids
                    if isinstance(chroma, SparseAlleles):
                        raise ValueError("Plink output not for Sparse Data")

                    ga = chroma.astype(str).tolist()
                    gb = chromb.astype(str).tolist()
                    gn = interleave(ga, gb)
                    g.extend(gn)

                outline.extend(g)

                # Write it out
                outline = delim.join(outline)
                f.write(outline)
                f.write('\n')