Beispiel #1
0
    def run(self):
        primers = Primers(self.input)

        try:
            (primers.update_melt_temps().update_locations(
                self.fg_genome_fp).activate())
        except AttributeError as e:
            warn("Error updating database: '{}'".format(e.message))
            raise e
Beispiel #2
0
    def run(self):
        primers = Primers(self.input)

        try:
            (primers
                .update_melt_temps()
                .update_locations(self.fg_genome_fp)
                .activate())
        except AttributeError as e:
            warn("Error updating database: '{}'".format(e.message))
            raise e
Beispiel #3
0
 def parse_args(self, argv, quiet=False):
     args, unknown = self.parser.parse_known_args(argv)
     self.args = vars(args)
     self.unknown = unknown
     for k, v in self.args.items():
         setattr(self, k, v)
     if not quiet:
         self._pprint_args()
     if len(self.args) > 0 and all(v is None for k, v in self.args.items()):
         warn(
             u'[swga {0:s}]: All parameters are missing- this may indicate a corrupt or missing parameters file.'
             .format(self.name))
Beispiel #4
0
 def parse_args(self, argv, quiet=False):
     args, unknown = self.parser.parse_known_args(argv)
     self.unknown = unknown
     self.args = vars(args)
     self.kwargs_as_args(**self.args)
     if not quiet:
         self.pprint_args()
     if len(self.args) > 0 and all(v is None for k, v in self.args.items()):
         swga.warn(
             "[swga %s]: All parameters are missing- ",
             "this may indicate a corrupt or missing parameters file."
             % self.name)
Beispiel #5
0
    def process_lines(self, setfinder_lines):
        passed = processed = 0
        smallest_max_dist = float('inf')

        try:
            for line in setfinder_lines:
                try:
                    primer_ids, bg_dist_mean = score.read_set_finder_line(line)
                except ValueError:
                    warn("Could not parse line:\n\t" + line)
                    continue

                primers = Primers.select_by_ids(primer_ids)
                processed += 1

                set_score, variables, max_dist = score.score_set(
                    primers=primers,
                    max_fg_bind_dist=self.max_fg_bind_dist,
                    bg_dist_mean=bg_dist_mean,
                    chr_ends=self.chr_ends,
                    score_fun=self.score_fun,
                    interactive=False
                )

                if max_dist < smallest_max_dist:
                    smallest_max_dist = max_dist

                message(
                    STATUS_LINE.format(processed, passed, smallest_max_dist),
                    newline=False)

                # Return early if the set doesn't pass
                if set_score is False:
                    continue
                else:
                    passed += 1

                Set.add(
                    _id=passed,
                    primers=primers,
                    score=set_score,
                    scoring_fn=self.score_expression,
                    **variables)

                if passed >= self.max_sets:
                    message("\nDone (scored %i sets)" % passed)
                    break
        finally:
            # Raises a GeneratorExit inside the find_sets command, prompting it
            # to quit the subprocess
            setfinder_lines.close()
Beispiel #6
0
    def process_lines(self, setfinder_lines):
        passed = processed = 0
        smallest_max_dist = float('inf')

        try:
            for line in setfinder_lines:
                try:
                    primer_ids, bg_dist_mean = score.read_set_finder_line(line)
                except ValueError:
                    warn("Could not parse line:\n\t" + line)
                    continue

                primers = Primers.select_by_ids(primer_ids)
                processed += 1

                set_score, variables, max_dist = score.score_set(
                    primers=primers,
                    max_fg_bind_dist=self.max_fg_bind_dist,
                    bg_dist_mean=bg_dist_mean,
                    chr_ends=self.chr_ends,
                    score_fun=self.score_fun,
                    interactive=False)

                if max_dist < smallest_max_dist:
                    smallest_max_dist = max_dist

                message(STATUS_LINE.format(processed, passed,
                                           smallest_max_dist),
                        newline=False)

                # Return early if the set doesn't pass
                if set_score is False:
                    continue
                else:
                    passed += 1

                Set.add(_id=passed,
                        primers=primers,
                        score=set_score,
                        scoring_fn=self.score_expression,
                        **variables)

                if passed >= self.max_sets:
                    message("\nDone (scored %i sets)" % passed)
                    break
        finally:
            # Raises a GeneratorExit inside the find_sets command, prompting it
            # to quit the subprocess
            setfinder_lines.close()
Beispiel #7
0
def calculate_bg_dist_mean(primers, bg_length):
    """Calculate the mean distance between binding sites on the bg genome.

    :param bg_length: the total length of the background genome.
    """
    total_bg_freq = sum(p.bg_freq for p in primers)
    if total_bg_freq == 0:
        warn(
            "No primers appear in the background genome: "
            "bg_dist_mean set as infinite")
        bg_dist_mean = float('Inf')
    else:
        bg_dist_mean = float(
            bg_length) / sum(p.bg_freq for p in primers)
    return bg_dist_mean
Beispiel #8
0
def read_primer_list(lines):
    """Read in a list of primers and return their records from the db.

    :param lines: a list of primer sequences, one per line; anything after the
    first whitespace is ignored.
    """
    seqs = [re.split(r'[ \t]+', line.strip('\n'))[0] for line in lines]
    primers = list(Primer.select().where(Primer.seq << seqs).execute())
    if len(primers) < len(seqs):
        primer_seqs = [p.seq for p in primers]
        missing = [_ for _ in seqs if _ not in primer_seqs]
        for seq in missing:
            warn(
                seq + ' not in the database; skipping. Add it manually with '
                '`swga count --input <file>` ')
    return primers
Beispiel #9
0
def init_db(db_fname, create_if_missing=False):
    '''
    Initializes the database at the file path specified.
    If `create_if_missing` is True, it will create the database if it can't be
    found. Otherwise, it exits with an error (SystemExit).
    '''
    if db_fname is None:
        swga.error("Primer db name cannot be `None`: corrupt preferences.cfg?")
    elif db_fname == ":memory:":
        swga.warn("Creating in-memory primer database; this may not work.")
    elif not os.path.isfile(db_fname) and not create_if_missing:
        # Exits here
        swga.error(
            "Primer db not found at '%s': specify different filename or "
            "re-run `swga count`" % db_fname, exception=False
        )
    db.init(db_fname)
    return db
Beispiel #10
0
    def _hits_per_record(self):
        '''
        Calculates the number of bases covered by primers in each record, and
        yields the record name, midpoint of the record, and number of hits as
        a tuple.
        '''
        record_ends = swga.locate.chromosome_ends(self.fg_genome_fp)
        for record_name, ends in record_ends.iteritems():
            record_length = ends[1] + 1

            # Check window size <= record_length and fix if not
            this_window_size = self.window_size
            if this_window_size > record_length:
                this_window_size = record_length
                swga.warn(
                    "In [{}]: window size larger than record; set to {}"
                    .format(record_name, this_window_size))

            # Check step size is compatible with window size and fix if not
            this_step_size = self.step_size
            if this_step_size > this_window_size:
                this_step_size = this_window_size
                swga.warn(
                    "In [{}]: step size larger than window size ({}), set to {}"
                    .format(record_name, this_window_size, this_step_size))

            # Count the number of primers that bind to any given nucleotide
            # in the current record
            counter = Counter()
            for primer in self.set.primers:
                k = len(primer.seq)
                locations = primer.locations()
                for l in locations[record_name]:
                    counter.update(Counter(xrange(l, l + k)))

            starting_positions = xrange(
                0, int(record_length - this_window_size), this_step_size)

            for start in starting_positions:
                end = start + this_window_size
                midpoint = (end + start) / 2
                # Add each base's count to get the number of bases covered
                hits = sum([counter[i] for i in xrange(start, end)])
                yield record_name, midpoint, hits
Beispiel #11
0
def check_create_tables(primer_db):
    if os.path.isfile(primer_db):
        swga.warn("Existing database found at %s" % os.path.abspath(primer_db))
        swga.warn("This will reset the entire database!")
        click.confirm("Are you sure you want to proceed?", abort=True)
    database.create_tables()