Ejemplo n.º 1
0
 def callable(lines):
     num_lines = 0
     header = None
     for line in lines:
         if is_empty(line):
             continue
         
         line = line.strip('\n').split(sep)
         if strip_wspace and by_column:
             line = [field.strip() for field in line]
         
         if with_header and not header:
             header = True
             yield line
             continue
         
         if converter:
             line = converter(line)
         
         if ignore(line):
             continue
         
         yield line
         
         num_lines += 1
         if limit is not None and num_lines >= limit:
             break
Ejemplo n.º 2
0
    def callable(lines):
        num_lines = 0
        header = None
        for line in lines:
            if is_empty(line):
                continue

            line = line.strip('\n').split(sep)
            if strip_wspace and by_column:
                line = [field.strip() for field in line]

            if with_header and not header:
                header = True
                yield line
                continue

            if converter:
                line = converter(line)

            if ignore(line):
                continue

            yield line

            num_lines += 1
            if limit is not None and num_lines >= limit:
                break
Ejemplo n.º 3
0
def parse_otu_list(lines, precision=0.0049):
    """Parser for mothur *.list file

    To ensure all distances are of type float, the parser returns a
    distance of 0.0 for the unique groups.  However, if some sequences
    are very similar, mothur may return a grouping at zero distance.
    What Mothur really means by this, however, is that the clustering
    is at the level of Mothur's precision.  In this case, the parser
    returns the distance explicitly.

    If you are parsing otu's with a non-default precision, you must
    specify the precision here to ensure that the parsed distances are
    in order.

    Returns an iterator over (distance, otu_list)
    """
    for line in lines:
        if is_empty(line):
            continue
        tokens = line.strip().split('\t')

        distance_str = tokens.pop(0)
        if distance_str.lstrip().lower().startswith('u'):
            distance = 0.0
        elif distance_str == '0.0':
            distance = float(precision)
        else:
            distance = float(distance_str)

        num_otus = int(tokens.pop(0))
        otu_list = [t.split(',') for t in tokens]

        yield (distance, otu_list)
Ejemplo n.º 4
0
def parse_otu_list(lines, precision=0.0049):
    """Parser for mothur *.list file

    To ensure all distances are of type float, the parser returns a
    distance of 0.0 for the unique groups.  However, if some sequences
    are very similar, mothur may return a grouping at zero distance.
    What Mothur really means by this, however, is that the clustering
    is at the level of Mothur's precision.  In this case, the parser
    returns the distance explicitly.

    If you are parsing otu's with a non-default precision, you must
    specify the precision here to ensure that the parsed distances are
    in order.

    Returns an iterator over (distance, otu_list)
    """
    for line in lines:
        if is_empty(line):
            continue
        tokens = line.strip().split('\t')

        distance_str = tokens.pop(0)
        if distance_str.lstrip().lower().startswith('u'):
            distance = 0.0
        elif distance_str == '0.0':
            distance = float(precision)
        else:
            distance = float(distance_str)

        num_otus = int(tokens.pop(0))
        otu_list = [t.split(',') for t in tokens]

        yield (distance, otu_list)