Example #1
0
async def scrape_list(url, get):
    datasets = []
    while True:
        async with get(url) as list_resp:
            text = await list_resp.text()
        html = orig_html = parse_html(text)
        # '[Replication or Save Conflict]' warnings add an extra column,
        # complicating the parsing.  The 'Collapse' parameter gets rid of those
        # but it also messes up the pagination (because why wouldn't it),
        # so we're left with having to download the same page twice
        if '[Replication or Save Conflict]' in text:
            notice("'[Replication or Save Conflict]' in {}", url)
            async with get(url + '&Collapse=') as list_resp:
                html = parse_html(await list_resp.text())

        datasets.extend([
            (';'.join(filter(None,
                             (i.text_content().strip() for i in
                              r.xpath('.//*[starts-with(@class, "format-box")]'))
                             )) or None,
             r.xpath('string(.//*[@class = "datasetcat"])').strip(),
             r.xpath('string(.//a[@class = "datasethead"]/@href)'),
             url) for r in html.xpath('''\
//font[@class = "datasetresults"]
/following-sibling::table[1]/tr[position() > 1]''')])
        try:
            url, = orig_html.xpath('//a[contains(string(.), "Επόμενη")]/@href')
        except ValueError:
            return datasets
Example #2
0
def save_restriction_sites(outpath, genome, cut_site):
    notice('called')
    rsites = all_matches_in_genome(cut_site, genome)
    with open(outpath, 'w') as f:
        for rsite in rsites:
            f.write('%s\t%s\t%s\n' % (rsite.chrom, rsite.start,
                                      rsite.end))
Example #3
0
        def wrapper(*args, **kwargs):
            start_time = time.time()
            request_id = self.generate_id()

            def inject_request_id(record):
                record.extra['request_id'] = request_id

            with logbook.Processor(inject_request_id):
                logbook.notice(self.request_str(), extra={"api": True})

                try:
                    response = callback(*args, **kwargs)
                except OperationalError as e:
                    logbook.warning("Database is down {}: {}", conf.database.uri, e, exc_info=True)
                    logbook.error("Database is down {}: {}", conf.database.uri, e)
                    response = errors.DatabaseIsDown()
                except errors.BadRequest as e:
                    e.format_response()
                    response = e
                except bottle.HTTPResponse as e:
                    response = e
                except Exception as e:
                    if self.under_test:
                        import traceback
                        traceback.print_exc()
                    logbook.exception("Exception during processing request: %s %s" %
                                      (bottle.request.method, bottle.request.path))
                    self.log_response(str(e), 500, time.time() - start_time)
                    raise
                finally:
                    from model import db
                    db.session.remove()
                response = self.response_process(response, time.time() - start_time)

            return response
Example #4
0
def save_fragments(filepath, fragments):
    notice('called')
    with open(filepath, 'w') as f:
        for x in fragments:
            f.write(bedentry_as_string(x.left_rsite, name='rsite'))
            f.write(bedentry_as_string(x.left_primer))
            f.write(bedentry_as_string(x.right_rsite, name='rsite'))
            f.write(bedentry_as_string(x.right_primer))
Example #5
0
 def log_response(self, response_body, status, work_time):
     from io import BufferedReader
     if isinstance(work_time, float):
         work_time = "%.2f" % work_time
     if isinstance(response_body, BufferedReader):
         response_body = "<BufferedReader>"
     response_parameters = [work_time, str(status), response_body]
     log_response = self.request_str(short=True) + "|" + "|".join(response_parameters)
     logbook.notice(log_response, extra={"api": True})
Example #6
0
def work_dir_fixture(this):
    directory = tempfile.mkdtemp()

    def rmdir():
        shutil.rmtree(directory)

    logbook.notice('Working directory: {}', directory)
    this.add_cleanup(rmdir)

    return directory
Example #7
0
 def log_response(self, response_body, status, work_time):
     from io import BufferedReader
     if isinstance(work_time, float):
         work_time = "%.2f" % work_time
     if isinstance(response_body, BufferedReader):
         response_body = "<BufferedReader>"
     response_parameters = [work_time, str(status), response_body]
     log_response = self.request_str(
         short=True) + "|" + "|".join(response_parameters)
     logbook.notice(log_response, extra={"api": True})
Example #8
0
def save_primer_sites(outpath, primers_fasta_path, genome):
    notice('called')
    primers = []

    for x in Bio.SeqIO.parse(primers_fasta_path, 'fasta'):
        ident = all_matches_in_genome(x.seq, genome)
        rcomp = all_matches_in_genome(x.seq.reverse_complement(), genome)
        primers.append((x.id, '+', ident))
        primers.append((x.id, '-', rcomp))
    with open(outpath, 'w') as f:
        for name, strand, hits in primers:
            for hit in  hits:
                f.write('%s\t%s\t%s\t%s\t.\t%s\n' % (
                    hit.chrom, hit.start, hit.end, name, strand))
Example #9
0
def save_tags(filepath, fragments, genome=None,
              re_offset=0):
    def get_tag_intervals():
        for frag in fragments:
            # rev primer
            rname = frag.left_primer.name
            if rname.endswith('_fwd') or rname.endswith('_rev'):
                rname = rname[:-4]
            rname += '_rev'
            rev_tag = get_tag_interval(frag.left_primer,
                                       frag.left_rsite, name=rname,
                                       re_offset=re_offset)
            yield ('left_primer', rev_tag)
            # fwd primer
            fname = frag.right_primer.name
            if fname.endswith('_fwd') or fname.endswith('_rev'):
                fname = fname[:-4]
            fname += '_fwd'
            fwd_tag = get_tag_interval(frag.right_primer,
                                       frag.right_rsite, name=fname,
                                       re_offset=re_offset)
            yield ('right_primer', fwd_tag)

    notice('called')
    if genome is None:
        with open(filepath, 'w') as f:
            for prim_loc, x in get_tag_intervals():
                f.write(bedentry_as_string(x, extra=prim_loc))
        return
    z = collections.defaultdict(set)
    for prim_loc, x in get_tag_intervals():
        seq = genome[x.chrom][x.start:x.end]
        if prim_loc == 'left_primer':
            assert x.strand == '-'
            seq = seq.reverse_complement()
        else:
            assert x.strand == '+'
        seq = seq.seq.tostring()
        if seq in z[x.name]:
            warn('%s has multiple identical tag sequences.' % x.name)
        else:
            z[x.name].add(seq)
    with open(filepath, 'w') as f:
        for name in sorted(z):
            v = z[name]
            while len(v):
                f.write('>%s\n' % name)
                f.write('%s\n' % v.pop())
Example #10
0
def all_matches_in_genome(needle, genome):
    '''
    Arguments:
    `needle`: sequence we are looking for
    `genome`: dict of sequence per chrom (haystack)

    Returns: An iterable of (chrom, start, end) ntuples for each match.
    '''
    BedEntry = collections.namedtuple('BedEntry', 'chrom start end')
    notice('called with needle "%s"' % needle)
    for chrom, v in genome.items():
        idx = 0
        while True:
            idx = v.seq.find(needle, idx)
            if idx == -1:
                break
            yield BedEntry(chrom, idx, idx + len(needle))
            idx += 1
Example #11
0
def load_genome(path):
    '''
    Arguments:
    `genome_dir`: directory containing zipped per chrom fasta files

    Returns: A dict of sequence per chromosome
    '''
    notice('called')
    if path.endswith('.gz'):
        f = gzip.open(path, 'rb')
    else:
        f = open(path, 'r')
    stream = Bio.SeqIO.parse(f, 'fasta')
    dna = {}
    for x in stream:
        dna[x.id] = x.upper()
    f.close()
    return dna
Example #12
0
def load_genome_dir(genome_dir):
    '''
    Arguments:
    `genome_dir`: directory containing zipped per chrom fasta files

    Returns: A dict of sequence per chromosome
    '''
    notice('called')
    names = os.listdir(genome_dir)
    dna = {}
    for x in names:
        assert x.endswith('.gz')
        name = x.split('.')[0]
        info('loading %s' % name)
        with gzip.open(os.path.join(genome_dir, x)) as f:
            stream = Bio.SeqIO.parse(f, 'fasta')
            dna[name] = stream.next().upper()
    return dna
Example #13
0
        def wrapper(*args, **kwargs):
            start_time = time.time()
            request_id = self.generate_id()

            def inject_request_id(record):
                record.extra['request_id'] = request_id

            with logbook.Processor(inject_request_id):
                logbook.notice(self.request_str(), extra={"api": True})

                try:
                    response = callback(*args, **kwargs)
                except OperationalError as e:
                    logbook.warning("Database is down {}: {}",
                                    conf.database.uri,
                                    e,
                                    exc_info=True)
                    logbook.error("Database is down {}: {}", conf.database.uri,
                                  e)
                    response = errors.DatabaseIsDown()
                except errors.BadRequest as e:
                    e.format_response()
                    response = e
                except bottle.HTTPResponse as e:
                    response = e
                except Exception as e:
                    if self.under_test:
                        import traceback
                        traceback.print_exc()
                    logbook.exception(
                        "Exception during processing request: %s %s" %
                        (bottle.request.method, bottle.request.path))
                    self.log_response(str(e), 500, time.time() - start_time)
                    raise
                finally:
                    from model import db
                    db.session.remove()
                response = self.response_process(response,
                                                 time.time() - start_time)

            return response
Example #14
0
 def test_global_functions(self):
     handler = logbook.TestHandler()
     with handler:
         logbook.debug('a debug message')
         logbook.info('an info message')
         logbook.warn('warning part 1')
         logbook.warning('warning part 2')
         logbook.notice('notice')
         logbook.error('an error')
         logbook.critical('pretty critical')
         logbook.log(logbook.CRITICAL, 'critical too')
     self.assert_(handler.has_debug('a debug message'))
     self.assert_(handler.has_info('an info message'))
     self.assert_(handler.has_warning('warning part 1'))
     self.assert_(handler.has_warning('warning part 2'))
     self.assert_(handler.has_notice('notice'))
     self.assert_(handler.has_error('an error'))
     self.assert_(handler.has_critical('pretty critical'))
     self.assert_(handler.has_critical('critical too'))
     self.assertEqual(handler.records[0].channel, 'Generic')
     self.assertEqual(handler.records[0].dispatcher, None)
Example #15
0
 def test_global_functions(self):
     handler = logbook.TestHandler()
     with handler:
         logbook.debug('a debug message')
         logbook.info('an info message')
         logbook.warn('warning part 1')
         logbook.warning('warning part 2')
         logbook.notice('notice')
         logbook.error('an error')
         logbook.critical('pretty critical')
         logbook.log(logbook.CRITICAL, 'critical too')
     self.assert_(handler.has_debug('a debug message'))
     self.assert_(handler.has_info('an info message'))
     self.assert_(handler.has_warning('warning part 1'))
     self.assert_(handler.has_warning('warning part 2'))
     self.assert_(handler.has_notice('notice'))
     self.assert_(handler.has_error('an error'))
     self.assert_(handler.has_critical('pretty critical'))
     self.assert_(handler.has_critical('critical too'))
     self.assertEqual(handler.records[0].logger_name, 'generic')
     self.assertEqual(handler.records[0].channel, None)
Example #16
0
def test_global_functions(activation_strategy):
    with activation_strategy(logbook.TestHandler()) as handler:
        logbook.debug('a debug message')
        logbook.info('an info message')
        logbook.warn('warning part 1')
        logbook.warning('warning part 2')
        logbook.notice('notice')
        logbook.error('an error')
        logbook.critical('pretty critical')
        logbook.log(logbook.CRITICAL, 'critical too')

    assert handler.has_debug('a debug message')
    assert handler.has_info('an info message')
    assert handler.has_warning('warning part 1')
    assert handler.has_warning('warning part 2')
    assert handler.has_notice('notice')
    assert handler.has_error('an error')
    assert handler.has_critical('pretty critical')
    assert handler.has_critical('critical too')
    assert handler.records[0].channel == 'Generic'
    assert handler.records[0].dispatcher is None
Example #17
0
def main():
    with StderrLogger(), \
            aiohttp.ClientSession(loop=loop) as session, \
            sqlite3.connect('data.sqlite') as conn:
        reported_total, datasets = loop\
            .run_until_complete(gather_datasets(prepare_getter(loop, session)))
        now = datetime.now().isoformat()
        conn.execute('''\
CREATE TABLE IF NOT EXISTS data
(identifier UNIQUE, title, url, formats, tag, source, fee, processing_level,
 release_date, license, update_frequency, reporting_period,
 geographic_coverage, 'contact_point/name', 'contact_point/email',
 meta__list_url, meta__last_updated)''')
        insert_total = conn.executemany('''\
INSERT OR REPLACE INTO data
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)''',
            ((*(i for _, i in sorted(d.items(),
                                     key=lambda i: fields.index(i[0]))), now)
             for d in datasets)).rowcount
        notice('Inserted {} datasets; {} are reported to exist',
               insert_total, reported_total)
Example #18
0
 def test_global_functions(self):
     handler = logbook.TestHandler()
     handler.push_thread()
     try:
         logbook.debug('a debug message')
         logbook.info('an info message')
         logbook.warn('warning part 1')
         logbook.warning('warning part 2')
         logbook.notice('notice')
         logbook.error('an error')
         logbook.critical('pretty critical')
         logbook.log(logbook.CRITICAL, 'critical too')
     finally:
         handler.pop_thread()
     self.assert_(handler.has_debug('a debug message'))
     self.assert_(handler.has_info('an info message'))
     self.assert_(handler.has_warning('warning part 1'))
     self.assert_(handler.has_warning('warning part 2'))
     self.assert_(handler.has_notice('notice'))
     self.assert_(handler.has_error('an error'))
     self.assert_(handler.has_critical('pretty critical'))
     self.assert_(handler.has_critical('critical too'))
     self.assertEqual(handler.records[0].channel, 'Generic')
     self.assertEqual(handler.records[0].dispatcher, None)
Example #19
0
def main(out_dir, genome, primers_filepath, re_site,
         max_dist_rsite_primer,
         re_offset=0):
    primer_sites_path = os.path.join(out_dir, 'primers.bed')
    re_cut_sites_path = os.path.join(out_dir, 're_cut_sites.bed')
    tags_bed_path = os.path.join(out_dir, 'tags.bed')
    tags_raw_bed_path = os.path.join(out_dir, 'tags_raw.bed')
    tags_fa_path = os.path.join(out_dir, 'tags.fa')
    if not os.path.isfile(primer_sites_path):
        save_primer_sites(primer_sites_path, primers_filepath, genome)
    else:
        notice('%s exists. using cached version' % primer_sites_path)
    if not os.path.isfile(re_cut_sites_path):
        save_restriction_sites(re_cut_sites_path, genome, re_site)
    else:
        notice('%s exists. using cached version' % re_cut_sites_path)

    fragments = list(find_primer_pairs_bordering_fragment(
        re_cut_sites_path, primer_sites_path, max_dist_rsite_primer))
    save_fragments(tags_raw_bed_path, fragments)
    save_tags(tags_bed_path, fragments,
              re_offset=re_offset)
    save_tags(tags_fa_path, fragments, genome=genome,
              re_offset=re_offset)
def create_settings_file():
    filename = 'photobackup_settings.py'
    global input

    # Python2 compatibility for input()
    try:
        input = raw_input
    except NameError:
        pass

    # ask for the upload directory (should be writable by the server)
    media_root = input("The directory where to put the pictures" +
                       " (should be writable by the server you use): ")
    if not os.path.isdir(media_root):
        notice("Directory {} does not exist, creating it".format(media_root))
        os.mkdir(media_root)
    server_user = input("Owner of the directory [www-data]: ")
    if not server_user:
        server_user = '******'

    try:
        server_user_uid = pwd.getpwnam(server_user).pw_uid
        if os.stat(media_root).st_uid != server_user_uid:
            notice("Changing owner to: ".format(server_user))
            try:
                shutil.chown(media_root, server_user, server_user)
            except AttributeError:
                warn("Can't change directory's owner, please do it correctly!")
    except KeyError:
        warn("User {} not found, please check the directory's rights."
             .format(server_user))

    # ask a password for the server
    text = "The server password that you use in the mobile app: "
    password = getpass.getpass(prompt=text)
    passhash = hashlib.sha512(password.encode('utf-8')).hexdigest()

    with open(filename, 'w') as settings:
        settings.write("# generated settings for PhotoBackup Bottle server\n")
        settings.write("MEDIA_ROOT = '{}'\n".format(media_root))
        settings.write("PASSWORD = '******'\n".format(passhash))

    notice("Settings file is created, please launch me again!")
    return media_root, passhash
Example #21
0
# Import mensafeed
if not "MENSAFEED_CONFIG" in environ:
    environ["MENSAFEED_CONFIG"] = path.abspath(path.join(path.dirname(__file__), "mensafeed.cfg"))
from mensafeed import app
from mensafeed.maachen import MensaAachenParser, urls

if __name__=="__main__":
    start_time = time()

    log_dir = path.join(path.dirname(__file__), 'log')
    if not path.exists(log_dir):
        makedirs(log_dir)

    log_handler = FileHandler(path.join(log_dir, 'fetch.log'), bubble=True)
    log_handler.push_application()

    parser = MensaAachenParser(urls)

    # Save fetched data to couchdb
    s = couchdb.Server(app.config["DATABASE_SERVER"])
    db = s["mensafeed"]

    data = {
        "canteens": parser.fetch(),
        "time_fetched": int(time())
    }

    db.save(data)

    notice("Fetched and stored data in %.2fs" % (time() - start_time))
Example #22
0
def find_primer_pairs_bordering_fragment(rsites_path, primer_path,
                                         max_dist):
    '''
    Finds pairs of primers flanking the same fragment.
    A fragment is the interval between to adjacent restriction enzyme
    cut sites.
    The leftmost primer site must be on the reverse strand
    and the rightmost primer site must be on the forward strand.

    Arguments:
    `rsites_path`: bed file path to restriction cut sites
    `primer_path`: bed file path to primer matches in the genome
    `max_dist`: the maximum distance between a primer and a cut site.

    Returns:
    An iterable of named tuples containing the flanking rsites and
    the two primers.
    (left_rsite left_primer right_primer right_rsite)
    '''
    notice('called')
    struct = collections.namedtuple(
        'PrimerFragment',
        'left_rsite left_primer right_primer right_rsite')
    mspi_sites = Lookup(BedTool(rsites_path))
    primers = collections.defaultdict(list)

    for p in BedTool(primer_path):
        basename = p.name.split('_')[0]
        primers[basename].append(p)

    primers = {k:sorted(v, key=lambda x: (x.chrom, x.start))
               for k, v in primers.items()}

    tot_frag = 0
    for basename, zs in primers.items():
        info('compting for %s' % basename)
        nfrag = 0
        for l, r in zip(zs[:-1], zs[1:]):
            # RNA POL II moves 3' -> 5' along the template strand
            if (not l.chrom == r.chrom or
                l.name == r.name or
                l.strand == '+' or
                r.strand =='-' or
                l.start - r.end > 10**4):
                continue
            sites = mspi_sites.get_in_interval(l.chrom,
                                               l.start - max_dist,
                                               r.end + max_dist)
            if len(sites) < 2: continue
            starts = [x.start for x in sites]
            lidx = bisect.bisect(starts, l.start)
            ridx = bisect.bisect(starts, r.start)
            if not lidx == ridx: continue
            lsite = sites[lidx-1]
            rsite = sites[ridx]
            if (lsite.start <= l.start and
                r.end <= rsite.end):
                nfrag += 1
                yield struct(lsite, l, r, rsite)
        notice('Stored %d fragments for %s' % (nfrag, basename))
        tot_frag += nfrag
    notice('Stored %d fragments in total' % tot_frag)
Example #23
0
    parser.add_argument('-m', '--max-distance-rsite-primer',
                        type=int, default=100)
    parser.add_argument('--re-offset', type=int, default=0)
    args = parser.parse_args(sys.argv[1:])

    if not os.path.isdir(args.output_dir):
        os.makedirs(args.output_dir)
    logfile_name = os.path.join(args.output_dir, 'log.txt')
    fmt = '{record.level_name} {record.func_name}: {record.message}'
    filehandler = logbook.FileHandler(logfile_name, mode='w',
                                      format_string=fmt)
    stderrhandler = logbook.StderrHandler(bubble=True, format_string=fmt)

    with filehandler.applicationbound():
        with stderrhandler.applicationbound():
            notice('Script arguments were:')
            notice(' '.join(sys.argv))
            primer_path = os.path.join(args.output_dir, 'primers.fa')
            notice('restriction enzyme is %s. ' % args.re_cut_site)
            shutil.copy(args.primers_fastq, primer_path)
            notice('Copied primers to %s' % primer_path)
            if os.path.isdir(args.genome):
                genome = load_genome_dir(args.genome)
            elif os.path.isfile(args.genome):
                genome = load_genome(args.genome)
            else:
                raise Exception("genome path must be a file or a directory: %s" % args.genome)

            find_tags(args.output_dir, genome,
                      primer_path, args.re_cut_site,
                      args.max_distance_rsite_primer,
                warn("Can't change directory's owner, please do it correctly!")
    except KeyError:
        warn("User {} not found, please check the directory's rights."
             .format(server_user))

    # ask a password for the server
    text = "The server password that you use in the mobile app: "
    password = getpass.getpass(prompt=text)
    passhash = hashlib.sha512(password.encode('utf-8')).hexdigest()

    with open(filename, 'w') as settings:
        settings.write("# generated settings for PhotoBackup Bottle server\n")
        settings.write("MEDIA_ROOT = '{}'\n".format(media_root))
        settings.write("PASSWORD = '******'\n".format(passhash))

    notice("Settings file is created, please launch me again!")
    return media_root, passhash

MEDIA_ROOT, PASSWORD = None, None

# import user-created settings for this specific server
try:
    from photobackup_settings import MEDIA_ROOT, PASSWORD
    if os.path.isdir(MEDIA_ROOT) and os.path.exists(MEDIA_ROOT):
        notice("pictures directory is " + MEDIA_ROOT)
    else:
        sys.exit("pictures directory " + MEDIA_ROOT + "does not exist!")
except ImportError:
    warn("Can't find photobackup_settings.py file, creating it")
    MEDIA_ROOT, PASSWORD = create_settings_file()