Exemple #1
0
            x = i.split()
            taxon = x[1:]
            for t in taxon:
                j = taxon.index(t)
                Index[t] = j + 1
            continue

        elif match('Coefficients:', i):
            continue
        
        i.strip()
        x = i.split()    
        
        species = x[0]
    
        if species in sample.keys():
            duplicates.append(species)
        else:
            sample[species] = {}

        if missing == 'y':
            mtax = ''
            for t in taxon:
                if x[Index[t]] == '/':
                    sample[species][t] = mtax
                    
                else:
                    sample[species][t] = x[Index[t]]
                    mtax = x[Index[t]]
        else:
            for t in taxon:
Exemple #2
0
def phy_re_analysis(options, args):
    """
    Script should be launched as:

    python PhyRe.py [samplefile] [masterlistfile] s1 s2 [options]

    Parameters
    ----------

    p : int
        permutations for confidence intervals
    d1 : int
        d1 and d2 are range for number of species for funnel plot
    d2 : int
        d1 and d2 are range for number of species for funnel plot


    Returns
    -------
    first_file.out : file
        Results from analyses of the sample. By default,
        the output file has the same name of the sample file
        with extension .OUT
    second_file.out : file
        Results from random subsamples of the master list.
        The funnel output file has the same name with suffix
        "_funnel" and extension .OUT. I


    Notes
    ----------
    Described in Phylogenetic representativeness: a new
    method for evaluating taxon sampling in evolutionary studies [1]

     References
    ----------
    .. [1] http://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-11-209
    .. [2] http://www.mozoolab.net/downloads/manual.pdf
    """

    samplefile = args['samplefile']
    popfile = args['popfile']
    d1 = args['d1']
    d2 = args['d2']

    output_as_string = False

    if options['m']:
        missing = options['m']
    else:
        missing = 'n'

    if options['o']:
        out = options['o']
    else:
        out = samplefile.split('.')[0]

    if options['p']:
        p = options['p']
    else:
        p = 1000

    if options['c']:
        ci = options['c']
    else:
        ci = 'y'

    if options['b']:
        batch = options['b']
    else:
        batch = 'n'

    if options['l']:
        pathlengths = options['l']
    else:
        pathlengths = 'n'

    if options['s']:
        output_as_string = True

    sample = defaultdict()
    # Population - dictionary with population file information
    population = defaultdict()

    if batch == 'y':
        Files = []
    else:
        Files = [samplefile]

    Index = {}
    Taxon = defaultdict()
    coef = {}
    taxon = []
    pathLengths = defaultdict()

    for i in open(samplefile):

        if batch == 'y':
            j = i.strip()
            Files.append(j)
        else:
            break

    duplicates = []

    with open(popfile) as fp:
        population_file_entries = fp.readlines()

    # We reed two first lines to check if we have System information
    # If we have it, we rebuild the array of lines
    lines_removal_counter = 0

    for i in population_file_entries[:2]:
        # If we encounter string, starting with "Taxon" we get
        # information about taxons
        if match('Taxon:', i):
            lines_removal_counter += 1
            x = i.split()
            x.remove('Taxon:')

            for i in x:
                taxon.append(i)
                j = x.index(i)
                # Index list is used to get the indexation of
                # taxon during string parsing
                Index[i] = j + 1
            continue

        elif match('Coefficients:', i):
            lines_removal_counter += 1
            x = i.split()
            x.remove('Coefficients:')
            x = map(eval, x)

            for t in taxon:
                i = taxon.index(t)
                coef[t] = sum(x[i:])
                pathLengths[t] = x[i]

            continue

    population_file_entries = population_file_entries[lines_removal_counter:]

    # opening population file and getting information
    for i in population_file_entries:

        # here starts entry processing
        (species_name, species_taxonomy) = \
            process_population_file_line(i, taxon, Index, missing)

        if species_name in population:
            duplicates.append(species_name)
        else:
            population[species_name] = species_taxonomy

    sample = population.copy()

    if len(duplicates) > 0:
        print "Population master list contains %s duplicates" \
              % (len(duplicates))

    if pathlengths == 'n':
        coef, popN, pathLengths = PathLength(population, taxon, Taxon)
    if pathlengths == 'y':
        XXX, popN, YYY = PathLength(population, taxon, Taxon)

    print "Finished path length calculation"

    results = {}

    """Opening all sample files (or the sample file)
     and getting information about it"""
    for f in Files:
        sample = get_sample_subset_from_sample_file(f, population)
        f = f.split('.')
        f = f[0]

        results[f] = {}

        samp = sample.keys()

        atd, taxonN, Taxon = ATDmean(sample, samp, taxon, coef)
        average_taxonomic_distinctness_variance = \
            ATDvariance(taxonN, samp, atd, taxon, coef)
        euler_results = euler(sample, atd, taxonN, taxon, Taxon, coef)

        results[f]['atd'] = atd
        results[f]['vtd'] = average_taxonomic_distinctness_variance
        results[f]['euler'] = euler_results
        results[f]['N'] = taxonN
        results[f]['n'] = len(sample)
        results[f]['taxon'] = Taxon

    phy_re_result = printResults(taxon, taxonN, popN, pathLengths, results)

    funnel_data = ''
    if ci == 'y':
        funnel_data = print_funnel_data(p, d1, d2, population, taxon, coef)

    if output_as_string:
        return phy_re_result, funnel_data
    else:
        with open(out + '.out', 'w') as fp:
            fp.write(phy_re_result)
        with open(out.split('_')[0] + '_funnel.out', 'w') as fp:
            fp.write(funnel_data)
Exemple #3
0
            x = i.split()
            taxon = x[1:]
            for t in taxon:
                j = taxon.index(t)
                Index[t] = j + 1
            continue

        elif match('Coefficients:', i):
            continue

        i.strip()
        x = i.split()

        species = x[0]

        if species in sample.keys():
            duplicates.append(species)
        else:
            sample[species] = {}

        if missing == 'y':
            mtax = ''
            for t in taxon:
                if x[Index[t]] == '/':
                    sample[species][t] = mtax

                else:
                    sample[species][t] = x[Index[t]]
                    mtax = x[Index[t]]
        else:
            for t in taxon: