def test_interleave(): assert interleave([1, 1, 1], [2, 2, 2]) == [1, 2, 1, 2, 1, 2] assert interleave([1], [2]) == [1, 2] assert interleave([], []) == [] assert_raises(ValueError, interleave, [1], [])
def test_interleave(): assert interleave([1,1,1], [2,2,2]) == [1,2,1,2,1,2] assert interleave([1],[2]) == [1,2] assert interleave([],[]) == [] assert_raises(ValueError, interleave, [1], [])
def write_ped(pedigrees, pedfile, delim=" ", predicate=None, output_chromosomes=None): """ write_ped writes data in a plink-format PED file, and optionally a plink-format map file. Arguments ------ pedigrees: An object of class PedigreeCollection containing what you want to output pedfile: a string giving the name out the file to output to. mapfile: the name of a mapfile to output, if you want to output one. an object that evaluates as False or None will skip the mapfile genotypes: Should genotypes be output True/False delim: Field seperator predicate: Which inputs to include in the output file. If not specified all are output. If the string is 'affected', only affected individuals are output. If the string is 'phenotyped', all individuals with phenotype information are output. Any other value of predicate must be a function to perform on the individual that evaluates to True/False for whether the individual should be output. Returns: Nothing """ # Check if we're only supposed to be outputting certain chromosomes if output_chromosomes is not None: checkchroms = True else: checkchroms = False if not predicate: predicate = lambda x: True elif predicate == "affected": predicate = lambda x: x.phenotypes["affected"] == 1 elif predicate == "phenotyped": predicate = lambda x: x.phenotypes["affected"] in set([0, 1]) elif not isinstance(predicate, collections.Callable): raise ValueError("Not a valid predicate!") afflab = {1: "2", 0: "1", None: "-9"} with open(pedfile, "w") as f: for pedigree in pedigrees.pedigrees: for ind in pedigree.individuals: if not predicate(ind): continue # Get the phenotype code aff = afflab[ind.phenotypes["affected"]] # Prepare the 6-column identifier outline = [ pedigree.label, ind.label, ind.father.label if ind.father is not None else "0", ind.mother.label if ind.mother is not None else "0", 1 if ind.sex == 0 else 2, aff, ] # Make strings outline = list(map(str, outline)) # Get the genotypes in the format we need them g = [] for template, chromatids in zip(ind.chromosomes, ind.genotypes): if checkchroms and template.outputlabel not in output_chromosomes: continue chroma, chromb = chromatids ga = chroma.astype(str).tolist() gb = chromb.astype(str).tolist() gn = interleave(ga, gb) g.extend(gn) outline.extend(g) # Write it out outline = delim.join(outline) f.write(outline) f.write("\n")
def write_ped(pedigrees, pedfile, delim=' ', predicate=None, output_chromosomes=None): """ write_ped writes data in a plink-format PED file, and optionally a plink-format map file. :param pedigrees: An object of class PedigreeCollection containing what you want to output :param pedfile: a string giving the name out the file to output to. :param mapfile: the name of a mapfile to output, if you want to output one. an object that evaluates as False or None will skip the mapfile :param genotypes: Should genotypes be output True/False :param delim: Field seperator :param predicate: Which inputs to include in the output file. If not specified all are output. If the string is 'affected', only affected individuals are output. If the string is 'phenotyped', all individuals with phenotype information are output. Any other value of predicate must be a function to perform on the individual that evaluates to True/False for whether the individual should be output. Returns: Nothing """ # Check if we're only supposed to be outputting certain chromosomes checkchroms = output_chromosomes is not None if not predicate: predicate = lambda x: True elif predicate == 'affected': predicate = lambda x: x.phenotypes['affected'] == 1 elif predicate == 'phenotyped': predicate = lambda x: x.phenotypes['affected'] in set([0, 1]) elif not isinstance(predicate, collections.Callable): raise ValueError('Not a valid predicate!') pheno_label = {1: '2', 0: '1', None: '-9'} def getlab(ind, default): """ Gets the label of an individual, or return different value ind is None """ return ind.label if ind is not None else default with smartopen(pedfile, 'w') as f: for pedigree in pedigrees.pedigrees: for ind in pedigree.individuals: if not predicate(ind): continue # Prepare the 6-column identifier outline = [ pedigree.label, ind.label, getlab(ind.father, '0'), getlab(ind.mother, '0'), 1 if ind.sex == 0 else 2, pheno_label[ind.phenotypes['affected']] ] # Make strings outline = list(map(str, outline)) # Get the genotypes in the format we need them g = [] for template, chromatids in zip(ind.chromosomes, ind.genotypes): if checkchroms and template.outputlabel not in output_chromosomes: continue chroma, chromb = chromatids if isinstance(chroma, SparseAlleles): raise ValueError("Plink output not for Sparse Data") ga = chroma.astype(str).tolist() gb = chromb.astype(str).tolist() gn = interleave(ga, gb) g.extend(gn) outline.extend(g) # Write it out outline = delim.join(outline) f.write(outline) f.write('\n')
def write_ped(pedigrees, pedfile, delim=' ', predicate=None, output_chromosomes=None): """ write_ped writes data in a plink-format PED file, and optionally a plink-format map file. :param pedigrees: An object of class PedigreeCollection containing what you want to output :param pedfile: a string giving the name out the file to output to. :param mapfile: the name of a mapfile to output, if you want to output one. an object that evaluates as False or None will skip the mapfile :param genotypes: Should genotypes be output True/False :param delim: Field seperator :param predicate: Which inputs to include in the output file. If not specified all are output. If the string is 'affected', only affected individuals are output. If the string is 'phenotyped', all individuals with phenotype information are output. Any other value of predicate must be a function to perform on the individual that evaluates to True/False for whether the individual should be output. Returns: Nothing """ # Check if we're only supposed to be outputting certain chromosomes checkchroms = output_chromosomes is not None if not predicate: predicate = lambda x: True elif predicate == 'affected': predicate = lambda x: x.phenotypes['affected'] == 1 elif predicate == 'phenotyped': predicate = lambda x: x.phenotypes['affected'] in set([0, 1]) elif not isinstance(predicate, collections.Callable): raise ValueError('Not a valid predicate!') pheno_label = {1: '2', 0: '1', None: '-9'} def getlab(ind, default): """ Gets the label of an individual, or return different value ind is None """ return ind.label if ind is not None else default with smartopen(pedfile, 'w') as f: for pedigree in pedigrees.pedigrees: for ind in pedigree.individuals: if not predicate(ind): continue # Prepare the 6-column identifier outline = [pedigree.label, ind.label, getlab(ind.father, '0'), getlab(ind.mother, '0'), 1 if ind.sex == 0 else 2, pheno_label[ind.phenotypes['affected']]] # Make strings outline = list(map(str, outline)) # Get the genotypes in the format we need them g = [] for template, chromatids in zip(ind.chromosomes, ind.genotypes): if checkchroms and template.outputlabel not in output_chromosomes: continue chroma, chromb = chromatids if isinstance(chroma, SparseAlleles): raise ValueError("Plink output not for Sparse Data") ga = chroma.astype(str).tolist() gb = chromb.astype(str).tolist() gn = interleave(ga, gb) g.extend(gn) outline.extend(g) # Write it out outline = delim.join(outline) f.write(outline) f.write('\n')