예제 #1
0
def check_tiling(reads, breaks, contig_len, debug=False):
    """Checks if there are reads tiling across breakpoints with no gaps
    
    This will be used for checking integrity of breakpoint where there is a novel
    sequence of considerable size and there is not enough flanking sequences for read pairs
    to suggest validity of fragment
    
    Args:
        reads:  (list) Pysam AlignedRead objects
        breaks: (tuple) sorted coordinates of breakpoint positions in contigs
    Returns:
        Boolean if there are reads spanning across breakpoints with no gaps
    """
    span = None
    for read in reads:
	# skip reads that is unmapped, not properly paired, or the second mate, or not fully mapped
        if not read.alen or not is_fully_mapped(read, contig_len):
            continue
	
	# skip reads that don't overlap the breakpoints
	if read.pos + read.alen < breaks[0] or read.pos > breaks[1]:
	    continue
	
	try:
	    span = span.union(intspan('%d-%d' % (read.pos + 1, read.pos + read.alen)))
	except:
	    span = intspan('%d-%d' % (read.pos + 1, read.pos + read.alen))
	    	
    if span is not None:
	break_span = intspan('%d-%d' % (breaks[0], breaks[1]))
	# make sure there is no gap in tiling reads and spans the entire breakpoint
	if len(span.ranges()) == 1 and len(span & break_span) == len(break_span):
	    return True
    
    return False
예제 #2
0
파일: bwa_mem.py 프로젝트: bcgsc/pavfinder
def find_untemplated_sequence(aligns, contig_seq):
    """Finds untemplated sequence in chimeric breakpoint
    This corresponds to any sequence at the breakpoint that is not covered by the
    2 alignments in the chimera.  
    The sequence will be given is the same strand as the first alignment 
    (the first and second alignments should have the same strand)
    
    Args:
        aligns: (list) 2 Alignment objects of chimera
        contig_seq: (str) Contig sequence
    Returns:
        Untemplated sequence or None
    """
    untemplated_seq = '-'
    
    contig_span1 = intspan('%s-%s' % (aligns[0].qstart, aligns[0].qend))
    contig_span2 = intspan('%s-%s' % (aligns[1].qstart, aligns[1].qend))
    sorted_contig_coords = sorted([aligns[0].qstart, aligns[0].qend, aligns[1].qstart, aligns[1].qend])
    whole_span = intspan('%s-%s' % (min(sorted_contig_coords), max(sorted_contig_coords)))
    unmapped = whole_span - contig_span1 - contig_span2
    
    if len(unmapped) > 0:
        unmapped_coords = unmapped.ranges()
        untemplated_seq = contig_seq[unmapped_coords[0][0] - 1 : unmapped_coords[0][1]]
        # sequence given in relation to strand of first alignment
        if aligns[0].strand == '-':
            untemplated_seq = reverse_complement(untemplated_seq)
    
    return untemplated_seq
예제 #3
0
def find_untemplated_sequence(aligns, contig_seq):
    """Finds untemplated sequence in chimeric breakpoint
    This corresponds to any sequence at the breakpoint that is not covered by the
    2 alignments in the chimera.  
    The sequence will be given is the same strand as the first alignment 
    (the first and second alignments should have the same strand)
    
    Args:
        aligns: (list) 2 Alignment objects of chimera
        contig_seq: (str) Contig sequence
    Returns:
        Untemplated sequence or None
    """
    untemplated_seq = '-'

    contig_span1 = intspan('%s-%s' % (aligns[0].qstart, aligns[0].qend))
    contig_span2 = intspan('%s-%s' % (aligns[1].qstart, aligns[1].qend))
    sorted_contig_coords = sorted(
        [aligns[0].qstart, aligns[0].qend, aligns[1].qstart, aligns[1].qend])
    whole_span = intspan(
        '%s-%s' % (min(sorted_contig_coords), max(sorted_contig_coords)))
    unmapped = whole_span - contig_span1 - contig_span2

    if len(unmapped) > 0:
        unmapped_coords = unmapped.ranges()
        untemplated_seq = contig_seq[unmapped_coords[0][0] -
                                     1:unmapped_coords[0][1]]
        # sequence given in relation to strand of first alignment
        if aligns[0].strand == '-':
            untemplated_seq = reverse_complement(untemplated_seq)

    return untemplated_seq
예제 #4
0
	def create_span(blocks):
	    """Creates intspan for each block"""
	    span = None
	    for block in blocks:
		if (type(block) is tuple or type(block) is list) and len(block) == 2:
		    if span is None:
			span = intspan('%s-%s' % (block[0], block[1]))
		    else:
			span = span.union(intspan('%s-%s' % (block[0], block[1])))
	    return span
예제 #5
0
	def create_span(blocks):
	    """Creates intspan for each block"""
	    span = None
	    for block in blocks:
		if (type(block) is tuple or type(block) is list) and len(block) == 2:
		    if span is None:
			span = intspan('%s-%s' % (block[0], block[1]))
		    else:
			span = span.union(intspan('%s-%s' % (block[0], block[1])))
	    return span
예제 #6
0
파일: mapping.py 프로젝트: dmacmillan/CLEAT
 def create_span(cls, blocks):
     """Creates intspan for each block
     Used by self.overlap()
     """
     span = None
     for block in blocks:
         try:
             span = span.union(intspan('%s-%s' % (block[0], block[1])))
         except:
             span = intspan('%s-%s' % (block[0], block[1]))
             
     return span
예제 #7
0
 def create_span(blocks):
     """Creates intspan for each block"""
     span = None
     for block in blocks:
         if (isinstance(block, tuple)
                 or isinstance(block, list)) and len(block) == 2:
             if span is None:
                 span = intspan('{}-{}'.format(block[0], block[1]))
             else:
                 span = span.union(
                     intspan('{}-{}'.format(block[0], block[1])))
     return span
예제 #8
0
def preprocess_variant(q_i):
    """
    Preprocess question's variant (if necessary)

    :param q_i: the Q_info object, whose fields are
                'id,kind,subkind,nb_source,options,order'
    :type q_i: Q_info (named tuple)
    """
    if q_i.id == 'order_of_operations':
        default_variant = {
            'order_of_operations': {'variant': '0-23,100-87'}
        }
        if ('variant' not in q_i.options
            or ('variant' in q_i.options and q_i.options['variant'] == '')):
            q_i.options.update(default_variant[q_i.id])
        try:
            variants_to_pick_from = intspan(q_i.options['variant'])
        except ParseError:
            raise ValueError('Incorrect variant in xml file: {}'
                             .format(q_i.options['variant']))
        raw_query = '('
        last = len(variants_to_pick_from.ranges()) - 1
        for i, r in enumerate(variants_to_pick_from.ranges()):
            if r[0] == r[1]:
                raw_query += 'nb1 = ' + str(r[0])
            else:
                raw_query += '(nb1 >= {} AND nb1 <= {})'.format(r[0], r[1])
            if i < last:
                raw_query += ' OR '
        raw_query += ')'
        q_i.options.update(
            {'variant':
             int(shared
                 .order_of_operations_variants_source
                 .next(**{'raw': raw_query})[0])})
예제 #9
0
def locate_features(breaks, orients, features):
    """Find the 'best' gene features of the breakpoints of a given event
    It will first determine which features overlap which breakpoint.
    If there are features that overlap both breakpoints, only they will be considered
    in picking the 'best' suitable one.
    
    Args:
        breaks: (tuple) the 2 breakpoints ((chr1, pos1), (chr2, pos2))
        orients: (tuple) the 2 orientations ('L|R', 'L|R')
        features: (list) Interval objects of a given event from parsing the bedpe overlap file
        
    Returns:
        A tuple of 2 feature (interval object) picked to annotate the 2 breakpoints
        can be (None, None) if nothing is found)
    """
    # use intspan to intersect individual breakpoint with feature coodinates
    break1_span = intspan('%s-%s' % (breaks[0][1], breaks[0][1]))
    break2_span = intspan('%s-%s' % (breaks[1][1], breaks[1][1]))

    # categories features where 1, 2, or both breakpoints overlap
    # use Set because there may be redundancy
    overlaps = {'both': Set(), '1': Set(), '2': Set()}

    for feature in features:
        feature_span = intspan('%s-%s' % (feature.start + 1, feature.stop))

        overlap1 = True if feature.chrom == breaks[0][
            0] and feature_span & break1_span else False
        overlap2 = True if feature.chrom == breaks[1][
            0] and feature_span & break2_span else False
        if overlap1 and overlap2:
            overlaps['both'].add(feature)
        elif overlap1:
            overlaps['1'].add(feature)
        elif overlap2:
            overlaps['2'].add(feature)

    # only considers features that overlap both breakpoints if such are found
    if overlaps['both']:
        best_feature1 = pick_feature(breaks[0], orients[0], overlaps['both'])
        best_feature2 = pick_feature(breaks[1], orients[1], overlaps['both'])
    else:
        best_feature1 = pick_feature(breaks[0], orients[0], overlaps['1'])
        best_feature2 = pick_feature(breaks[1], orients[1], overlaps['2'])

    return best_feature1, best_feature2
예제 #10
0
def get_contig_coverage(aligns, end_to_end=False):
    """Coverage of the contig by the union of the primary_aligns alignments
    
    Args:
        aligns: (list) All Alignments constituting a chimera
	
    Returns:
        Fraction corresponding to coverage
    """
    span = intspan('%d-%d' % (aligns[0].qstart, aligns[0].qend))
    for i in range(1, len(aligns)):
	span = span.union(intspan('%d-%d' % (aligns[i].qstart, aligns[i].qend)))
	
    if not end_to_end:
	return len(span) / float(aligns[0].query_len)
    else:
	return (max(span) - min(span) + 1) / float(aligns[0].query_len)
예제 #11
0
파일: chimera.py 프로젝트: bcgsc/pavfinder
def check_inv_dup(adj, aligns):
    if adj.rearrangement == 'inv':
	target_span_before_bp = intspan('%s-%s' % (aligns[0].tstart, aligns[0].tend))
	target_span_after_bp = intspan('%s-%s' % (aligns[1].tstart, aligns[1].tend))
	if target_span_after_bp < target_span_before_bp:
	    adj.rearrangement = 'inv-dup'
	    
	    # reverse breakpoint and orientation to make it same as a dup
	    
	    if adj.target_breaks[1] == aligns[1].tstart:
		adj.target_breaks[1] = aligns[1].tend
	    else:
		adj.target_breaks[1] = aligns[1].tstart
		
	    if adj.orients[1] == 'L':
		adj.orients[1] = 'R'
	    else:
		adj.orients[1] = 'L'
예제 #12
0
def remove_rows(table_name, id_span):
    """Remove rows matching the ids from id_span from the table."""
    _assert_table_exists(table_name)
    for id_ in list(intspan(id_span)):
        _assert_row_exists(table_name, id_)
    values = _intspan2sqllist(id_span)
    cmd = f'DELETE FROM {table_name} WHERE id IN {values};'
    _exec(table_name, cmd)
    _reset_table_ids(table_name)
예제 #13
0
파일: annotate.py 프로젝트: bcgsc/pavfinder
def locate_features(breaks, orients, features):
    """Find the 'best' gene features of the breakpoints of a given event
    It will first determine which features overlap which breakpoint.
    If there are features that overlap both breakpoints, only they will be considered
    in picking the 'best' suitable one.
    
    Args:
        breaks: (tuple) the 2 breakpoints ((chr1, pos1), (chr2, pos2))
        orients: (tuple) the 2 orientations ('L|R', 'L|R')
        features: (list) Interval objects of a given event from parsing the bedpe overlap file
        
    Returns:
        A tuple of 2 feature (interval object) picked to annotate the 2 breakpoints
        can be (None, None) if nothing is found)
    """
    # use intspan to intersect individual breakpoint with feature coodinates
    break1_span = intspan('%s-%s' % (breaks[0][1], breaks[0][1]))
    break2_span = intspan('%s-%s' % (breaks[1][1], breaks[1][1]))
    
    # categories features where 1, 2, or both breakpoints overlap
    # use Set because there may be redundancy
    overlaps = {'both':Set(), '1':Set(), '2':Set()}
                          
    for feature in features:        
        feature_span = intspan('%s-%s' %  (feature.start + 1, feature.stop))
        
        overlap1 = True if feature.chrom == breaks[0][0] and feature_span & break1_span else False
        overlap2 = True if feature.chrom == breaks[1][0] and feature_span & break2_span else False
        if overlap1 and overlap2:
            overlaps['both'].add(feature)
        elif overlap1:
            overlaps['1'].add(feature)
        elif overlap2:
            overlaps['2'].add(feature)
            
    # only considers features that overlap both breakpoints if such are found
    if overlaps['both']:
        best_feature1 = pick_feature(breaks[0], orients[0], overlaps['both'])
        best_feature2 = pick_feature(breaks[1], orients[1], overlaps['both'])
    else:
        best_feature1 = pick_feature(breaks[0], orients[0], overlaps['1'])
        best_feature2 = pick_feature(breaks[1], orients[1], overlaps['2'])
        
    return best_feature1, best_feature2
예제 #14
0
파일: util.py 프로젝트: jwodder/pyrepo
def update_years2str(year_str: str, years: Optional[List[int]] = None) -> str:
    """
    Given a string of years of the form ``"2014, 2016-2017"``, update the
    string if necessary to include the given years (default: the current year).
    """
    if years is None:
        years = [time.localtime().tm_year]
    yearspan = intspan(year_str)
    yearspan.update(years)
    return years2str(yearspan)
예제 #15
0
def find_columns(lines):
    """
    Given a list of text lines, assume they represent a fixed-width
    "ASCII table" and guess the column indices therein. Depends on
    finding typographical "rivers" of spaces running vertically
    through text indicating column breaks.

    This is a high-probability heuristic (based on the many tests performed on
    it). There are some cases where all rows happen to include aligned spaces
    that do *not* signify a column break. In this case, recommend you modify
    the table with a separator line (e.g. using --- characters) showing where
    the columns should be. Since separators are stripped out, adding an
    explicit set of separators will not alter result data.
    """
    # Partition lines into seps (separators and blank lines) and nonseps (content)
    nonseps, seps = partition(is_separator, lines)

    # Find max length of content lines. This defines the "universe" of
    # available content columns. Use only non-separator lines because they
    # are the content we care most about.
    maxlen = max(len(l) for l in nonseps)
    universe = intspan.from_range(0, maxlen - 1)

    # If there are separators lines, try to find definitive vertical separation
    # markers in them to define column boundaries.
    if seps:
        # If separators, try to find the column breaks in them
        indices = col_break_indices(seps)
        iranges = (universe - indices).ranges()
    else:
        indices = None


    if not seps or not indices:
        # If horizontal separators not present, or if present but lack the vertical
        # separation indicators needed to determine column locations, look for
        # vertical separators common to all rows. A rare, but genuine case.
        indices = col_break_indices(nonseps, 'intersection_update')
        if not indices:
            # Vertical separators not found. Fall back to using vertical
            # whitespace rivers as column separators. Find where spaces are in
            # every column.
            indices = intspan.from_range(0, maxlen - 1)

            for l in lines:
                line_spaces = intspan(all_indices(l, ' '))
                indices.intersection_update(line_spaces)

        # indices is now intspan showing where spaces or vertical seps are
        # Find inclusive ranges where content would be
        iranges = (universe - indices).ranges()

    # Convert inclusive ranges to half-open Python ranges
    hranges = [(s, e+1) for s,e in iranges]
    return seps, nonseps, hranges
예제 #16
0
def check_inv_dup(adj, aligns):
    if adj.rearrangement == 'inv':
        target_span_before_bp = intspan('%s-%s' %
                                        (aligns[0].tstart, aligns[0].tend))
        target_span_after_bp = intspan('%s-%s' %
                                       (aligns[1].tstart, aligns[1].tend))
        if target_span_after_bp < target_span_before_bp:
            adj.rearrangement = 'inv-dup'

            # reverse breakpoint and orientation to make it same as a dup

            if adj.target_breaks[1] == aligns[1].tstart:
                adj.target_breaks[1] = aligns[1].tend
            else:
                adj.target_breaks[1] = aligns[1].tstart

            if adj.orients[1] == 'L':
                adj.orients[1] = 'R'
            else:
                adj.orients[1] = 'L'
예제 #17
0
def col_break_indices(lines, combine='update'):
    """
    Given a set of horizontal separator lines, return a guess as to
    which indices have column breaks, based on common indicator characters.
    """
    all_lines_indices = [vertical_sep_in_line(line) for line in lines]
    combined = intspan(all_lines_indices[0])

    update_func = getattr(combined, combine)
    for line_indices in all_lines_indices[1:]:
        update_func(line_indices)
    return combined
예제 #18
0
파일: chimera.py 프로젝트: bcgsc/pavfinder
    def _coverage(path):
	spans = [intspan('%d-%d' % (aligns[i].qstart, aligns[i].qend)) for i in path]
	covered = spans[0]
	overlaps = []
	for i in range(1, len(spans)):
	    covered = covered.union(spans[i])
	    
	    overlap = spans[i - 1].intersection(spans[i])
	    if len(overlap) > 0:
		overlaps.append(overlap)
	    
	return covered, overlaps
예제 #19
0
    def _coverage(path):
	spans = [intspan('%d-%d' % (aligns[i].qstart, aligns[i].qend)) for i in path]
	covered = spans[0]
	overlaps = []
	for i in range(1, len(spans)):
	    covered = covered.union(spans[i])
	    
	    overlap = spans[i - 1].intersection(spans[i])
	    if len(overlap) > 0:
		overlaps.append(overlap)
	    
	return covered, overlaps
예제 #20
0
def check_tiling(reads, breaks, contig_len, debug=False):
    """Checks if there are reads tiling across breakpoints with no gaps

    This will be used for checking integrity of breakpoint where there is a novel
    sequence of considerable size and there is not enough flanking sequences for read pairs
    to suggest validity of fragment

    Args:
        reads:  (list) Pysam AlignedRead objects
        breaks: (tuple) sorted coordinates of breakpoint positions in contigs
    Returns:
        Boolean if there are reads spanning across breakpoints with no gaps
    """
    span = None
    for read in reads:
        # skip reads that is unmapped, not properly paired, or the second mate,
        # or not fully mapped
        if not read.alen or not is_fully_mapped(read, contig_len):
            continue

        # skip reads that don't overlap the breakpoints
        if read.pos + read.alen < breaks[0] or read.pos > breaks[1]:
            continue

        try:
            span = span.union(
                intspan('{}-{}'.format(read.pos + 1, read.pos + read.alen)))
        except BaseException:
            span = intspan('{}-{}'.format(read.pos + 1, read.pos + read.alen))

    if span is not None:
        break_span = intspan('{}-{}'.format(breaks[0], breaks[1]))
        # make sure there is no gap in tiling reads and spans the entire
        # breakpoint
        if len(span.ranges()) == 1 and len(span
                                           & break_span) == len(break_span):
            return True

    return False
예제 #21
0
파일: groups.py 프로젝트: sqw23/pynab
def scan_missing_segments(group_name):
    """Scan for previously missed segments."""

    log.info('missing: checking for missed segments')

    with db_session() as db:
        # recheck for anything to delete
        expired = db.query(Miss).filter(
            Miss.attempts >= config.scan.get('miss_retry_limit')).filter(
                Miss.group_name == group_name).delete()
        db.commit()
        if expired:
            log.info('missing: deleted {} expired misses'.format(expired))

        # get missing articles for this group
        missing_messages = [
            r for r, in db.query(Miss.message).filter(
                Miss.group_name == group_name).all()
        ]

        if missing_messages:
            # mash it into ranges
            missing_ranges = intspan(missing_messages).ranges()

            server = Server()
            server.connect()

            status, parts, messages, missed = server.scan(
                group_name, message_ranges=missing_ranges)

            # if we got some missing parts, save them
            if parts:
                pynab.parts.save_all(parts)

            # even if they got blacklisted, delete the ones we got from the misses
            if messages:
                db.query(Miss).filter(Miss.message.in_(messages)).filter(
                    Miss.group_name == group_name).delete(False)

            db.commit()

            if missed:
                # clear up those we didn't get
                save_missing_segments(group_name, missed)

            if server.connection:
                try:
                    server.connection.quit()
                except:
                    pass
예제 #22
0
파일: bwa_mem.py 프로젝트: bcgsc/pavfinder
def find_microhomology(aligns, contig_seq):
    """Finds micromology given 2 alignments and contig sequence
    The homology sequence is based on the contig sequence
    
    Homology is found based on the fact that BWA-mem will report overlapping
    contig coordinates in chimeric alignments.
    
    Args:
        aligns: (list) 2 Alignment objects of chimera
        contig_seq: (str) Contig sequence
    Returns:
        Tuple of homology sequence(str) and homology (contig)coordinates((int, int)) 
    """
    homol_seq = None
    homol_coords = None
    
    contig_span1 = intspan('%s-%s' % (aligns[0].qstart, aligns[0].qend))
    contig_span2 = intspan('%s-%s' % (aligns[1].qstart, aligns[1].qend))
    overlap = contig_span1.intersection(contig_span2)
    if len(overlap) > 0:
        homol_coords = overlap.ranges()[0]
        homol_seq = contig_seq[homol_coords[0] - 1 : homol_coords[1]]
    
    return homol_seq, homol_coords
예제 #23
0
def find_microhomology(aligns, contig_seq):
    """Finds micromology given 2 alignments and contig sequence
    The homology sequence is based on the contig sequence
    
    Homology is found based on the fact that BWA-mem will report overlapping
    contig coordinates in chimeric alignments.
    
    Args:
        aligns: (list) 2 Alignment objects of chimera
        contig_seq: (str) Contig sequence
    Returns:
        Tuple of homology sequence(str) and homology (contig)coordinates((int, int)) 
    """
    homol_seq = None
    homol_coords = None

    contig_span1 = intspan('%s-%s' % (aligns[0].qstart, aligns[0].qend))
    contig_span2 = intspan('%s-%s' % (aligns[1].qstart, aligns[1].qend))
    overlap = contig_span1.intersection(contig_span2)
    if len(overlap) > 0:
        homol_coords = overlap.ranges()[0]
        homol_seq = contig_seq[homol_coords[0] - 1:homol_coords[1]]

    return homol_seq, homol_coords
예제 #24
0
def derangify(nodes):
    data_array = []
    for node in nodes:
        match = re.match('(.*)\[(.*)\](\..*)', node)
        if match:
            (prefix,nrange,suffix) = match.groups()
            length = 0
            if re.match('.*\-.*',nrange):
                (r1,r2) = nrange.split('-')
                length = len(str(r2))
            for num in intspan(nrange):
                data_array.append("%s%s%s" %(prefix,str(num).zfill(length),suffix))
        else:
            data_array.append(node)

    return data_array
예제 #25
0
파일: derange.py 프로젝트: prpllrhd/test123
def derangify(nodes):
    data_array = []
    for node in nodes:
        match = re.match('(.*)\[(.*)\](\..*)', node)
        if match:
            (prefix, nrange, suffix) = match.groups()
            length = 0
            if re.match('.*\-.*', nrange):
                (r1, r2) = nrange.split('-')
                length = len(str(r2))
            for num in intspan(nrange):
                data_array.append("%s%s%s" %
                                  (prefix, str(num).zfill(length), suffix))
        else:
            data_array.append(node)

    return data_array
예제 #26
0
파일: groups.py 프로젝트: Murodese/pynab
def scan_missing_segments(group_name):
    """Scan for previously missed segments."""

    log.info('missing: checking for missed segments')

    with db_session() as db:
        # recheck for anything to delete
        expired = db.query(Miss).filter(Miss.attempts >= config.scan.get('miss_retry_limit')).filter(
            Miss.group_name == group_name).delete()
        db.commit()
        if expired:
            log.info('missing: deleted {} expired misses'.format(expired))

        # get missing articles for this group
        missing_messages = [r for r, in db.query(Miss.message).filter(Miss.group_name == group_name).all()]

        if missing_messages:
            # mash it into ranges
            missing_ranges = intspan(missing_messages).ranges()

            server = Server()
            server.connect()

            status, parts, messages, missed = server.scan(group_name, message_ranges=missing_ranges)

            # if we got some missing parts, save them
            if parts:
                pynab.parts.save_all(parts)

            # even if they got blacklisted, delete the ones we got from the misses
            if messages:
                db.query(Miss).filter(Miss.message.in_(messages)).filter(Miss.group_name == group_name).delete(False)

            db.commit()

            if missed:
                # clear up those we didn't get
                save_missing_segments(group_name, missed)

            if server.connection:
                try:
                    server.connection.quit()
                except:
                    pass
예제 #27
0
def derangify(nodes):
    data_array = []
    for node in nodes:
        match = re.match('(.*)\[(.*)\](\-.*)', node)
        #	vsccwn[100-300]-brn.vscc.vrsn.com
        #	^([a-zA-Z]+)(\d+)([a-zA-Z0-9-]+)(\..*)$', node
        if match:
            (prefix, nrange, suffix) = match.groups()
            length = 0
            if re.match('.*\-.*', nrange):
                (r1, r2) = nrange.split('-')
                length = len(str(r2))
            for num in intspan(nrange):
                data_array.append("%s%s%s" %
                                  (prefix, str(num).zfill(length), suffix))
        else:
            data_array.append(node)

    return data_array
예제 #28
0
def update_years2str(year_str, years=None):
    """
    Given a string of years of the form ``"2014, 2016-2017"``, update the
    string if necessary to include the given years (default: the current year).

    >>> update_years2str('2015', [2015])
    '2015'
    >>> update_years2str('2015', [2016])
    '2015-2016'
    >>> update_years2str('2015', [2017])
    '2015, 2017'
    >>> update_years2str('2014-2015', [2016])
    '2014-2016'
    >>> update_years2str('2013, 2015', [2016])
    '2013, 2015-2016'
    >>> update_years2str('2013, 2015', [2017, 2014])
    '2013-2015, 2017'
    """
    if years is None:
        years = [time.localtime().tm_year]
    yearspan = intspan(year_str)
    yearspan.update(years)
    return years2str(yearspan)
예제 #29
0
def _intspan2sqllist(s):
    """Turn an ints' span (given as str) to a SQLite list of values."""
    values = ', '.join([str(n) for n in list(intspan(s))])
    return f'({values})'
예제 #30
0
def test_intspansproduct_filter_packs():
    packs_list = \
        [[[intspan('1-2'), intspan('1-2')], [intspan('3-4'), intspan('5-6')]],
         [[intspan('1-2'), intspan('3-4')], [intspan('1-2'), intspan('5-6')]],
         [[intspan('1-2'), intspan('5-6')], [intspan('1-2'), intspan('3-4')]]]
    assert IntspansProduct._filter_packs(packs_list) == []
    packs_list = \
        [[[intspan('1-2'), intspan('1-2'), intspan('3-4')], [intspan('5-6')]],
         [[intspan('1-2'), intspan('1-2'), intspan('5-6')], [intspan('3-4')]],
         [[intspan('1-2'), intspan('3-4'), intspan('5-6')], [intspan('1-2')]]]
    assert IntspansProduct._filter_packs(packs_list) == []
    packs_list = \
        [[[intspan('1-2'), intspan('1,5')], [intspan('1,3'), intspan('3-4')]],
         [[intspan('1-2'), intspan('3-4')], [intspan('1,3'), intspan('1,5')]],
         [[intspan('1-2'), intspan('1,3')], [intspan('1,5'), intspan('3-4')]]]
    assert IntspansProduct._filter_packs(packs_list) == [[intspan('1'),
                                                          intspan('3')]]
    packs_list = \
        [[[intspan('1-2'), intspan('1,3'), intspan('1,5')], [intspan('3-4')]],
         [[intspan('1-2'), intspan('1,5'), intspan('3-4')], [intspan('1,3')]],
         [[intspan('1-2'), intspan('1,3'), intspan('3-4')], [intspan('1,5')]],
         [[intspan('1,3'), intspan('1,5'), intspan('3-4')], [intspan('1-2')]]]
    assert IntspansProduct._filter_packs(packs_list) == [[intspan('1'),
                                                          intspan('3-4')]]
    packs_list = \
        [[[intspan('20-30'), intspan('20-40'), intspan('20-50')],
          [intspan('20-60'), intspan('20-90')]],
         [[intspan('20-30'), intspan('20-40'), intspan('20-60')],
          [intspan('20-50'), intspan('20-90')]],
         [[intspan('20-30'), intspan('20-40'), intspan('20-90')],
          [intspan('20-50'), intspan('20-60')]],
         [[intspan('20-30'), intspan('20-50'), intspan('20-60')],
          [intspan('20-40'), intspan('20-90')]],
         [[intspan('20-30'), intspan('20-50'), intspan('20-90')],
          [intspan('20-40'), intspan('20-60')]],
         [[intspan('20-30'), intspan('20-60'), intspan('20-90')],
          [intspan('20-40'), intspan('20-50')]],
         [[intspan('20-40'), intspan('20-50'), intspan('20-60')],
          [intspan('20-30'), intspan('20-90')]],
         [[intspan('20-40'), intspan('20-50'), intspan('20-90')],
          [intspan('20-30'), intspan('20-60')]],
         [[intspan('20-40'), intspan('20-60'), intspan('20-90')],
          [intspan('20-30'), intspan('20-50')]],
         [[intspan('20-50'), intspan('20-60'), intspan('20-90')],
          [intspan('20-30'), intspan('20-40')]]]
    assert IntspansProduct._filter_packs(packs_list) == [[intspan('20-30'),
                                                          intspan('20-60')],
                                                         [intspan('20-30'),
                                                          intspan('20-50')],
                                                         [intspan('20-30'),
                                                          intspan('20-40')],
                                                         [intspan('20-40'),
                                                          intspan('20-30')],
                                                         [intspan('20-50'),
                                                          intspan('20-30')],
                                                         ]
예제 #31
0
파일: util.py 프로젝트: jwodder/pyrepo
def years2str(years: List[int]) -> str:
    return str(intspan(years)).replace(",", ", ")
예제 #32
0
def groupVlan(vlanlist):  #Grouping vlan  e.g. 2,3,4,7,8,10 -> 2-4,7-8,10
    vlan_merged = str(intspan(vlanlist))
    return vlan_merged
예제 #33
0
def test_intspansproduct_rebuild_spans_from_packs():
    filtered_packs = [[intspan('1'), intspan('3')]]
    assert IntspansProduct._rebuild_spans_from_packs(filtered_packs, '3_2') \
        == [[intspan('1'), intspan('1'), intspan('1'),
             intspan('3'), intspan('3')]]
    assert IntspansProduct._rebuild_spans_from_packs(filtered_packs, '2_2') \
        == [[intspan('1'), intspan('1'),
             intspan('3'), intspan('3')]]
    assert IntspansProduct._rebuild_spans_from_packs(filtered_packs, '1_1') \
        == [[intspan('1'), intspan('3')]]
    filtered_packs = [[intspan('1'), intspan('3')],
                      [intspan('4'), intspan('5-6')]]
    assert IntspansProduct._rebuild_spans_from_packs(filtered_packs, '3_1') \
        == [[intspan('1'), intspan('1'), intspan('1'), intspan('3')],
            [intspan('4'), intspan('4'), intspan('4'), intspan('5-6')]]
예제 #34
0
def call_event(align1,
               align2,
               query_seq=None,
               no_sort=False,
               max_inv_target_olap=30000,
               debug=False):
    """Curates adj based on info given by primary_aligns alignments
    
    Args:
        align1: First Alignment object
	align2: Second Alignment object
	homol_seq: (str) Microhomology sequence in contig
	homol_coords: (tuple) Start and end coordinates of microhomology sequence in contig
	novel_seq: (str) Untemplated sequence at breakpoint
	query_seq: (str) Query sequence
	
    Returns:
	Adjacency object
    """
    # figure out breakpoints using query positions
    target_breaks = [None, None]
    orients = [None, None]
    query_breaks = [None, None]
    homol_seq = None
    homol_seq_coords = None
    novel_seq = None
    novel_seq_coords = None

    align1_tpos = (align1.tstart,
                   align1.tend) if align1.strand == '+' else (align1.tend,
                                                              align1.tstart)
    align2_tpos = (align2.tstart,
                   align2.tend) if align2.strand == '+' else (align2.tend,
                                                              align2.tstart)
    if align1.qstart < align2.qstart:
        aligns = [align1, align2]
        target_breaks[
            0] = align1.tend if align1.strand == '+' else align1.tstart
        orients[0] = 'L' if max(align1.tstart,
                                align1.tend) == target_breaks[0] else 'R'
        target_breaks[
            1] = align2.tstart if align2.strand == '+' else align2.tend
        orients[1] = 'L' if max(align2.tstart,
                                align2.tend) == target_breaks[1] else 'R'
        query_breaks = [align1.qend, align2.qstart]

    else:
        aligns = [align2, align1]
        target_breaks[
            0] = align2.tend if align2.strand == '+' else align2.tstart
        orients[0] = 'L' if max(align2.tstart,
                                align2.tend) == target_breaks[0] else 'R'
        target_breaks[
            1] = align1.tstart if align1.strand == '+' else align1.tend
        orients[1] = 'L' if max(align1.tstart,
                                align1.tend) == target_breaks[1] else 'R'
        query_breaks = [align2.qend, align1.qstart]

    if not no_sort:
        if (aligns[0].target != aligns[1].target and compare_chr(aligns[0].target, aligns[1].target) > 0) or\
           (aligns[0].target == aligns[1].target and target_breaks[0] > target_breaks[1]):
            aligns.reverse()
            target_breaks.reverse()
            orients.reverse()

    rearrangement = None
    if aligns[0].target != aligns[1].target:
        rearrangement = 'trl'
    elif orients[0] == orients[1]:
        span1 = intspan('%s-%s' % (aligns[0].tstart, aligns[0].tend))
        span2 = intspan('%s-%s' % (aligns[1].tstart, aligns[1].tend))
        olap = span1 & span2
        if len(olap) <= max_inv_target_olap:
            rearrangement = 'inv'
        else:
            print '%s:potential inv disallowed - target overlap %d bigger than %s' % (
                aligns[0].query, len(olap), max_inv_target_olap)
    elif orients[0] == 'L' and orients[1] == 'L':
        rearrangement = 'inv'
    elif orients[0] == 'L' and orients[1] == 'R':
        if target_breaks[0] < target_breaks[1]:
            if target_breaks[0] + 1 == target_breaks[1]:
                # deletion of tandem duplicaton
                if query_breaks[0] >= query_breaks[1]:
                    rearrangement = 'del'
                    target_breaks = [
                        target_breaks[1] + 1, target_breaks[0] +
                        (query_breaks[0] - query_breaks[1] + 1)
                    ]
                else:
                    rearrangement = 'ins'
            else:
                # deletion with or without microhology
                rearrangement = 'del'

        elif target_breaks[0] > target_breaks[1]:
            rearrangement = 'dup'

        else:
            if query_breaks[0] < query_breaks[1]:
                rearrangement = 'ins'
            else:
                # deletion of tandem duplicaton
                rearrangement = 'del'
                target_breaks = [
                    target_breaks[1] + 1,
                    target_breaks[0] + (query_breaks[0] - query_breaks[1] + 1)
                ]

    elif orients[0] == 'R' and orients[1] == 'R':
        rearrangement = 'inv'
    elif orients[0] == 'R' and orients[1] == 'L':
        if target_breaks[0] == target_breaks[1]:
            rearrangement = 'ins'
        elif target_breaks[0] < target_breaks[1]:
            rearrangement = 'dup'
        else:
            rearrangement = 'del'

    # novel seq
    if query_seq is not None and query_breaks[1] - query_breaks[0] > 1:
        novel_seq = query_seq[query_breaks[0]:query_breaks[1] - 1]
        if aligns[0].strand == '-':
            novel_seq = reverse_complement(novel_seq)
            novel_seq_coords = (query_breaks[0] + 1, query_breaks[1] - 1)

    # homol seq
    if query_seq is not None and query_breaks[0] >= query_breaks[1]:
        homol_seq_coords = [query_breaks[1], query_breaks[0]]
        homol_seq = query_seq[query_breaks[1] - 1:query_breaks[0]]
        if aligns[0].strand == '-':
            homol_seq = reverse_complement(homol_seq)
            homol_seq_coords = (query_breaks[1], query_breaks[0])

    adj = None
    if rearrangement is not None:
        adj = Adjacency(
            align1.query,
            (aligns[0].target, aligns[1].target),
            query_breaks,
            target_breaks,
            rearrangement=rearrangement,
            orients=orients,
            homol_seq=homol_seq,
            homol_seq_coords=homol_seq_coords,
            novel_seq=novel_seq,
            novel_seq_coords=novel_seq_coords,
        )

    elif debug:
        sys.stdout.write(
            "cannot figure out event of primary_aligns alignment contig:%s targets:%s,%s orients:%s breaks:%s query_breaks:%s\n"
            % (aligns[0].query, aligns[0].target, aligns[1].target, orients,
               breaks, query_breaks))
    return adj
예제 #35
0
def test_intspansproduct_group_by_packs():
    r = IntspansProduct('1,2×1,2×3,4×5,6')
    assert r._group_by_packs(r.spans, '2_2') == \
        [[[intspan('1-2'), intspan('1-2')], [intspan('3-4'), intspan('5-6')]],
         [[intspan('1-2'), intspan('3-4')], [intspan('1-2'), intspan('5-6')]],
         [[intspan('1-2'), intspan('5-6')], [intspan('1-2'), intspan('3-4')]]]
    assert r._group_by_packs(r.spans, '3_1') == \
        [[[intspan('1-2'), intspan('1-2'), intspan('3-4')], [intspan('5-6')]],
         [[intspan('1-2'), intspan('1-2'), intspan('5-6')], [intspan('3-4')]],
         [[intspan('1-2'), intspan('3-4'), intspan('5-6')], [intspan('1-2')]]]
    r = IntspansProduct('1,5×1,2×1,3×3,4')
    assert r._group_by_packs(r.spans, '2_2') == \
        [[[intspan('1-2'), intspan('1,5')], [intspan('1,3'), intspan('3-4')]],
         [[intspan('1-2'), intspan('3-4')], [intspan('1,3'), intspan('1,5')]],
         [[intspan('1-2'), intspan('1,3')], [intspan('1,5'), intspan('3-4')]]]
    assert r._group_by_packs(r.spans, '3_1') == \
        [[[intspan('1-2'), intspan('1,3'), intspan('1,5')], [intspan('3-4')]],
         [[intspan('1-2'), intspan('1,5'), intspan('3-4')], [intspan('1,3')]],
         [[intspan('1-2'), intspan('1,3'), intspan('3-4')], [intspan('1,5')]],
         [[intspan('1,3'), intspan('1,5'), intspan('3-4')], [intspan('1-2')]]]
    assert r._group_by_packs(r.spans, '1_1_1_1') == \
        [[[intspan('1-2')], [intspan('1,3')], [intspan('1,5')],
          [intspan('3-4')]]]
    assert r._group_by_packs(r.spans, '4') == \
        [[[intspan('1-2'), intspan('1,3'), intspan('1,5'), intspan('3-4')]]]
    r = IntspansProduct('1×2,3×2,4')
    with pytest.raises(ValueError) as excinfo:
        r._group_by_packs(r.spans, '3_2_1')
    assert str(excinfo.value) == "dist_code '3_2_1' cannot be used for a "\
        'list of 3 intspans.'
    assert r._group_by_packs(r.spans, '2_1') == \
        [[[intspan('1'), intspan('2-3')], [intspan('2,4')]],
         [[intspan('1'), intspan('2,4')], [intspan('2-3')]],
         [[intspan('2-3'), intspan('2,4')], [intspan('1')]]]
    r = IntspansProduct('20-30×20-40×20-50×20-60×20-90')
    assert r._group_by_packs(r.spans, '3_2') == \
        [[[intspan('20-30'), intspan('20-40'), intspan('20-50')],
          [intspan('20-60'), intspan('20-90')]],
         [[intspan('20-30'), intspan('20-40'), intspan('20-60')],
          [intspan('20-50'), intspan('20-90')]],
         [[intspan('20-30'), intspan('20-40'), intspan('20-90')],
          [intspan('20-50'), intspan('20-60')]],
         [[intspan('20-30'), intspan('20-50'), intspan('20-60')],
          [intspan('20-40'), intspan('20-90')]],
         [[intspan('20-30'), intspan('20-50'), intspan('20-90')],
          [intspan('20-40'), intspan('20-60')]],
         [[intspan('20-30'), intspan('20-60'), intspan('20-90')],
          [intspan('20-40'), intspan('20-50')]],
         [[intspan('20-40'), intspan('20-50'), intspan('20-60')],
          [intspan('20-30'), intspan('20-90')]],
         [[intspan('20-40'), intspan('20-50'), intspan('20-90')],
          [intspan('20-30'), intspan('20-60')]],
         [[intspan('20-40'), intspan('20-60'), intspan('20-90')],
          [intspan('20-30'), intspan('20-50')]],
         [[intspan('20-50'), intspan('20-60'), intspan('20-90')],
          [intspan('20-30'), intspan('20-40')]]]
예제 #36
0
파일: chimera.py 프로젝트: bcgsc/pavfinder
def call_event(align1, align2, query_seq=None, no_sort=False, max_inv_target_olap=30000, debug=False):
    """Curates adj based on info given by primary_aligns alignments
    
    Args:
        align1: First Alignment object
	align2: Second Alignment object
	homol_seq: (str) Microhomology sequence in contig
	homol_coords: (tuple) Start and end coordinates of microhomology sequence in contig
	novel_seq: (str) Untemplated sequence at breakpoint
	query_seq: (str) Query sequence
	
    Returns:
	Adjacency object
    """    
    # figure out breakpoints using query positions
    target_breaks = [None, None]
    orients = [None, None]
    query_breaks = [None, None]
    homol_seq = None
    homol_seq_coords = None
    novel_seq = None
    novel_seq_coords = None

    align1_tpos = (align1.tstart, align1.tend) if align1.strand == '+' else (align1.tend, align1.tstart)
    align2_tpos = (align2.tstart, align2.tend) if align2.strand == '+' else (align2.tend, align2.tstart)
    if align1.qstart < align2.qstart:
	aligns = [align1, align2]
	target_breaks[0] = align1.tend if align1.strand == '+' else align1.tstart
	orients[0] = 'L' if max(align1.tstart, align1.tend) == target_breaks[0] else 'R'
	target_breaks[1] = align2.tstart if align2.strand == '+' else align2.tend
	orients[1] = 'L' if max(align2.tstart, align2.tend) == target_breaks[1] else 'R'
	query_breaks = [align1.qend, align2.qstart]
	
    else:
	aligns = [align2, align1]
	target_breaks[0] = align2.tend if align2.strand == '+' else align2.tstart
	orients[0] = 'L' if max(align2.tstart, align2.tend) == target_breaks[0] else 'R'
	target_breaks[1] = align1.tstart if align1.strand == '+' else align1.tend
	orients[1] = 'L' if max(align1.tstart, align1.tend) == target_breaks[1] else 'R'
	query_breaks = [align2.qend, align1.qstart]
	
    if not no_sort:
	if (aligns[0].target != aligns[1].target and compare_chr(aligns[0].target, aligns[1].target) > 0) or\
	   (aligns[0].target == aligns[1].target and target_breaks[0] > target_breaks[1]):
	    aligns.reverse()
	    target_breaks.reverse()
	    orients.reverse()
	    	
    rearrangement = None
    if aligns[0].target != aligns[1].target:
	rearrangement = 'trl'
    elif orients[0] == orients[1]:
	span1 = intspan('%s-%s' % (aligns[0].tstart, aligns[0].tend))
	span2 = intspan('%s-%s' % (aligns[1].tstart, aligns[1].tend))
	olap = span1 & span2
	if len(olap) <= max_inv_target_olap:
	    rearrangement = 'inv'
	else:
	    print '%s:potential inv disallowed - target overlap %d bigger than %s' % (aligns[0].query,
	                                                                           len(olap),
	                                                                           max_inv_target_olap)
    elif orients[0] == 'L' and orients[1] == 'L':
	rearrangement = 'inv'
    elif orients[0] == 'L' and orients[1] == 'R':
	if target_breaks[0] < target_breaks[1]:
	    if target_breaks[0] + 1 == target_breaks[1]:
		# deletion of tandem duplicaton
		if query_breaks[0] >= query_breaks[1]:
		    rearrangement = 'del'
		    target_breaks = [target_breaks[1] + 1, target_breaks[0] + (query_breaks[0] - query_breaks[1] + 1)]
		else:
		    rearrangement = 'ins'
	    else:
		# deletion with or without microhology
		rearrangement = 'del'
			    
	elif target_breaks[0] > target_breaks[1]:
	    rearrangement = 'dup'

	else:
	    if query_breaks[0] < query_breaks[1]:
		rearrangement = 'ins'
	    else:
		# deletion of tandem duplicaton
		rearrangement = 'del'
		target_breaks = [target_breaks[1] + 1, target_breaks[0] + (query_breaks[0] - query_breaks[1] + 1)]
	
    elif orients[0] == 'R' and orients[1] == 'R':
	rearrangement = 'inv'
    elif orients[0] == 'R' and orients[1] == 'L':
	if target_breaks[0] == target_breaks[1]:
	    rearrangement = 'ins'
	elif target_breaks[0] < target_breaks[1]:
	    rearrangement = 'dup'
	else:
	    rearrangement = 'del'
	    
    # novel seq
    if query_seq is not None and query_breaks[1] - query_breaks[0] > 1:
	novel_seq = query_seq[query_breaks[0] : query_breaks[1] - 1]
	if aligns[0].strand == '-':
	    novel_seq = reverse_complement(novel_seq)
	    novel_seq_coords = (query_breaks[0] + 1, query_breaks[1] - 1)

    # homol seq
    if query_seq is not None and query_breaks[0] >= query_breaks[1]:
	homol_seq_coords = [query_breaks[1], query_breaks[0]]
	homol_seq = query_seq[query_breaks[1] - 1 : query_breaks[0]]
	if aligns[0].strand == '-':
	    homol_seq = reverse_complement(homol_seq)
	    homol_seq_coords = (query_breaks[1], query_breaks[0])
			
    adj = None
    if rearrangement is not None:
	adj = Adjacency(align1.query,
	                (aligns[0].target, aligns[1].target),
	                query_breaks,
	                target_breaks,
	                rearrangement = rearrangement,
	                orients = orients,
	                homol_seq = homol_seq,
	                homol_seq_coords = homol_seq_coords,
	                novel_seq = novel_seq,
	                novel_seq_coords = novel_seq_coords,
	                )
	    
    elif debug:
	sys.stdout.write("cannot figure out event of primary_aligns alignment contig:%s targets:%s,%s orients:%s breaks:%s query_breaks:%s\n" % (aligns[0].query,
	                                                                                                                                         aligns[0].target,
	                                                                                                                                         aligns[1].target,
	                                                                                                                                         orients,
	                                                                                                                                         breaks,
	                                                                                                                                         query_breaks))
    return adj
예제 #37
0
def inspect_project(dirpath=None):
    if dirpath is None:
        dirpath = Path()
    if not (dirpath / 'setup.py').exists():
        raise ValueError('No setup.py in project root')
    if not (dirpath / 'setup.cfg').exists():
        raise ValueError('No setup.cfg in project root')
    cfg = read_configuration(str(dirpath / 'setup.cfg'))
    env = {
        "project_name": cfg["metadata"]["name"],
        "short_description": cfg["metadata"]["description"],
        "author": cfg["metadata"]["author"],
        "author_email": cfg["metadata"]["author_email"],
        "python_requires": cfg["options"]["python_requires"],
        "install_requires": cfg["options"].get("install_requires", []),
        "importable": "version" in cfg["metadata"],
    }

    if cfg["options"].get("packages"):
        env["is_flat_module"] = False
        env["import_name"] = cfg["options"]["packages"][0]
    else:
        env["is_flat_module"] = True
        env["import_name"] = cfg["options"]["py_modules"][0]

    env["python_versions"] = []
    for clsfr in cfg["metadata"]["classifiers"]:
        m = re.fullmatch(r'Programming Language :: Python :: (\d+\.\d+)',
                         clsfr)
        if m:
            env["python_versions"].append(m.group(1))

    env["commands"] = {}
    try:
        commands = cfg["options"]["entry_points"]["console_scripts"]
    except KeyError:
        pass
    else:
        for cmd in commands:
            k, v = re.split(r'\s*=\s*', cmd, maxsplit=1)
            env["commands"][k] = v

    m = re.fullmatch(
        r'https://github.com/([^/]+)/([^/]+)',
        cfg["metadata"]["url"],
    )
    assert m, 'Project URL is not a GitHub URL'
    env["github_user"] = m.group(1)
    env["repo_name"] = m.group(2)

    if "Documentation" in cfg["metadata"]["project_urls"]:
        m = re.fullmatch(
            r'https?://([-a-zA-Z0-9]+)\.(?:readthedocs|rtfd)\.io',
            cfg["metadata"]["project_urls"]["Documentation"],
        )
        assert m, 'Documentation URL is not a Read the Docs URL'
        env["rtfd_name"] = m.group(1)
    else:
        env["rtfd_name"] = env["project_name"]

    if "Say Thanks!" in cfg["metadata"]["project_urls"]:
        m = re.fullmatch(
            r'https://saythanks\.io/to/([^/]+)',
            cfg["metadata"]["project_urls"]["Say Thanks!"],
        )
        assert m, 'Invalid Say Thanks! URL'
        env["saythanks_to"] = m.group(1)
    else:
        env["saythanks_to"] = None

    if (dirpath / 'tox.ini').exists():
        toxcfg = ConfigParser(interpolation=None)
        toxcfg.read(str(dirpath / 'tox.ini'))
        env["has_tests"] = toxcfg.has_section("testenv")
    else:
        env["has_tests"] = False

    env["has_travis"] = (dirpath / '.travis.yml').exists()
    env["has_docs"] = (dirpath / 'docs' / 'index.rst').exists()

    env["travis_user"] = env["codecov_user"] = env["github_user"]
    try:
        with (dirpath / 'README.rst').open(encoding='utf-8') as fp:
            rdme = Readme.parse(fp)
    except FileNotFoundError:
        env["has_pypi"] = False
    else:
        for badge in rdme.badges:
            m = re.fullmatch(
                r'https://travis-ci\.(?:com|org)/([^/]+)/[^/]+\.svg'
                r'(?:\?branch=.+)?', badge.href)
            if m:
                env["travis_user"] = m.group(1)
            m = re.fullmatch(
                r'https://codecov\.io/gh/([^/]+)/[^/]+/branch/.+'
                r'/graph/badge\.svg', badge.href)
            if m:
                env["codecov_user"] = m.group(1)
        env["has_pypi"] = any(link["label"] == "PyPI"
                              for link in rdme.header_links)

    with (dirpath / 'LICENSE').open(encoding='utf-8') as fp:
        for line in fp:
            m = re.match(r'^Copyright \(c\) (\d[-,\d\s]+\d) \w+', line)
            if m:
                env["copyright_years"] = list(intspan(m.group(1)))
                break
        else:
            raise ValueError('Copyright years not found in LICENSE')

    return env
예제 #38
0
            rdme = Readme.load(fp)
    except FileNotFoundError:
        env["has_pypi"] = False
    else:
        for badge in rdme.badges:
            if m := re.fullmatch(
                r"https://codecov\.io/gh/([^/]+)/[^/]+/branch/.+" r"/graph/badge\.svg",
                badge.href,
            ):
                env["codecov_user"] = m[1]
        env["has_pypi"] = any(link["label"] == "PyPI" for link in rdme.header_links)

    with (directory / "LICENSE").open(encoding="utf-8") as fp:
        for line in fp:
            if m := re.match(r"^Copyright \(c\) (\d[-,\d\s]+\d) \w+", line):
                env["copyright_years"] = list(intspan(m[1]))
                break
        else:
            raise InvalidProjectError("Copyright years not found in LICENSE")

    env["extra_testenvs"] = parse_extra_testenvs(
        directory / ".github" / "workflows" / "test.yml"
    )

    return env


class ModuleInfo(BaseModel):
    import_name: str
    is_flat_module: bool
    src_layout: bool
예제 #39
0
def years2str(years):
    return str(intspan(years)).replace(',', ', ')