def make_status_spread(cls, desired_output, period, role_map): desired_output = deepcopy(desired_output) header = desired_output[0] del desired_output[0] del header[0] ranges = [] for h in header: start = None end = None if period == "month": startts = dates.parse(h, "%Y-%m") year, month = divmod(startts.month + 1, 12) if month == 0: month = 12 year = year - 1 endts = datetime(startts.year + year, month, 1) start = dates.format(startts) end = dates.format(endts) elif period == "year": startts = dates.parse(h, "%Y") endts = datetime(startts.year + 1, 1, 1) start = dates.format(startts) end = dates.format(endts) ranges.append((start, end)) provs = [] for row in desired_output: user = row[0] del row[0] role = role_map[user] for i in range(len(row)): count = row[i] start, end = ranges[i] status = None if role == "associate_editor": status = constants.APPLICATION_STATUS_COMPLETED elif role == "editor": status = constants.APPLICATION_STATUS_READY elif role == "admin": status = ADMIN_STATUSES[randint(0, len(ADMIN_STATUSES) - 1)] for j in range(count): p = Provenance() p.set_created(dates.random_date(start, end)) p.user = user p.roles = [role] p.type = "suggestion" p.action = "status:" + status p.resource_id = uuid.uuid4().hex provs.append(p) return provs
def make_status_spread(cls, desired_output, period, role_map): desired_output = deepcopy(desired_output) header = desired_output[0] del desired_output[0] del header[0] ranges = [] for h in header: start = None end = None if period == "month": startts = dates.parse(h, "%Y-%m") year, month = divmod(startts.month+1, 12) if month == 0: month = 12 year = year - 1 endts = datetime(startts.year + year, month, 1) start = dates.format(startts) end = dates.format(endts) elif period == "year": startts = dates.parse(h, "%Y") endts = datetime(startts.year + 1, 1, 1) start = dates.format(startts) end = dates.format(endts) ranges.append((start, end)) provs = [] for row in desired_output: user = row[0] del row[0] role = role_map[user] for i in range(len(row)): count = row[i] start, end = ranges[i] status = None if role == "associate_editor": status = constants.APPLICATION_STATUS_COMPLETED elif role == "editor": status = constants.APPLICATION_STATUS_READY elif role == "admin": status = ADMIN_STATUSES[randint(0, len(ADMIN_STATUSES) - 1)] for j in range(count): p = Provenance() p.set_created(dates.random_date(start, end)) p.user = user p.roles = [role] p.type = "suggestion" p.action = "status:" + status p.resource_id = uuid.uuid4().hex provs.append(p) return provs
def content_reports(fr, to, outdir): report = {} q = ContentByDate(fr, to) res = models.Suggestion.query(q=q.query()) year_buckets = res.get("aggregations", {}).get("years", {}).get("buckets", []) for years in year_buckets: ds = years.get("key_as_string") do = dates.parse(ds) year = do.year if year not in report: report[year] = {} country_buckets = years.get("countries", {}).get("buckets", []) for country in country_buckets: cc = country.get("key") cn = datasets.get_country_name(cc) if cn not in report[year]: report[year][cn] = {} count = country.get("doc_count") report[year][cn]["count"] = count table = _tabulate_time_entity_group(report, "Country") filename = "applications_by_year_by_country__" + _fft(fr) + "_to_" + _fft(to) + "__on_" + dates.today() + ".csv" outfiles = [] outfile = os.path.join(outdir, filename) outfiles.append(outfile) with codecs.open(outfile, "wb", "utf-8") as f: writer = UnicodeWriter(f) for row in table: writer.writerow(row) return outfiles
def content_reports(fr, to, outdir): report = {} q = ContentByDate(fr, to) res = models.Suggestion.query(q=q.query()) year_buckets = res.get("aggregations", {}).get("years", {}).get("buckets", []) for years in year_buckets: ds = years.get("key_as_string") do = dates.parse(ds) year = do.year if year not in report: report[year] = {} country_buckets = years.get("countries", {}).get("buckets", []) for country in country_buckets: cc = country.get("key") cn = datasets.get_country_name(cc) if cn not in report[year]: report[year][cn] = {} count = country.get("doc_count") report[year][cn]["count"] = count table = _tabulate_time_entity_group(report, "Country") filename = "applications_by_year_by_country__" + _fft(fr) + "_to_" + _fft( to) + "__on_" + dates.today() + ".csv" outfiles = [] outfile = os.path.join(outdir, filename) outfiles.append(outfile) with codecs.open(outfile, "wb", "utf-8") as f: writer = UnicodeWriter(f) for row in table: writer.writerow(row) return outfiles
def date(self, val): if val: try: parsed_date = dates.parse(val) val = parsed_date.year except ValueError: val = None self._date = val
def make_action_spread(cls, desired_output, action, period): desired_output = deepcopy(desired_output) header = desired_output[0] del desired_output[0] del header[0] ranges = [] for h in header: start = None end = None if period == "month": startts = dates.parse(h, "%Y-%m") year, month = divmod(startts.month+1, 12) if month == 0: month = 12 year = year - 1 endts = datetime(startts.year + year, month, 1) start = dates.format(startts) end = dates.format(endts) elif period == "year": startts = dates.parse(h, "%Y") endts = datetime(startts.year + 1, 1, 1) start = dates.format(startts) end = dates.format(endts) ranges.append((start, end)) provs = [] for row in desired_output: user = row[0] del row[0] for i in range(len(row)): count = row[i] start, end = ranges[i] for j in range(count): p = Provenance() p.set_created(dates.random_date(start, end)) p.user = user p.type = "suggestion" p.action = action p.resource_id = uuid.uuid4().hex provs.append(p) return provs
def make_action_spread(cls, desired_output, action, period): desired_output = deepcopy(desired_output) header = desired_output[0] del desired_output[0] del header[0] ranges = [] for h in header: start = None end = None if period == "month": startts = dates.parse(h, "%Y-%m") year, month = divmod(startts.month + 1, 12) if month == 0: month = 12 year = year - 1 endts = datetime(startts.year + year, month, 1) start = dates.format(startts) end = dates.format(endts) elif period == "year": startts = dates.parse(h, "%Y") endts = datetime(startts.year + 1, 1, 1) start = dates.format(startts) end = dates.format(endts) ranges.append((start, end)) provs = [] for row in desired_output: user = row[0] del row[0] for i in range(len(row)): count = row[i] start, end = ranges[i] for j in range(count): p = Provenance() p.set_created(dates.random_date(start, end)) p.user = user p.type = "suggestion" p.action = action p.resource_id = uuid.uuid4().hex provs.append(p) return provs
def iterate(self, issn, since, to=None): # set the default value for to, if not already set if to is None: to = dates.now() # get the dates into a datestamp sd = dates.parse(since) td = dates.parse(to) # calculate the ranges we're going to want to query by # We're going to query epmc one day at a time, so that we can effectively # iterate through in updated date order (though within each day, there will # be no ordering, there is little we can do about that except reduce the # request granularity further, which would massively increase the number # of requests) ranges = dates.day_ranges(sd, td) throttle = app.config.get("EPMC_HARVESTER_THROTTLE") last = None for fr, until in ranges: # throttle each day if last is not None and throttle is not None: diff = (datetime.utcnow() - last).total_seconds() app.logger.debug( "Last day request at {x}, {y}s ago; throttle {z}s".format( x=last, y=diff, z=throttle)) if diff < throttle: waitfor = throttle - diff app.logger.debug( "Throttling EPMC requests for {x}s".format(x=waitfor)) time.sleep(waitfor) # build the query for the oa articles in that issn for the specified day (note we don't use the range, as the granularity in EPMC means we'd double count # note that we use date_sort=True as a weak proxy for ordering by updated date (it actually orders by publication date, which may be partially the same as updated date) query = queries.oa_issn_updated(issn, fr, date_sort=True) for record in client.EuropePMC.complex_search_iterator( query, throttle=throttle): # also throttle paging requests article = self.crosswalk(record) yield article, fr last = datetime.utcnow()
def make_application_spread(cls, desired_output, period): desired_output = deepcopy(desired_output) header = desired_output[0] del desired_output[0] del header[0] ranges = [] for h in header: start = None end = None if period == "month": startts = dates.parse(h, "%Y-%m") year, month = divmod(startts.month+1, 12) if month == 0: month = 12 year = year - 1 endts = datetime(startts.year + year, month, 1) start = dates.format(startts) end = dates.format(endts) elif period == "year": startts = dates.parse(h, "%Y") endts = datetime(startts.year + 1, 1, 1) start = dates.format(startts) end = dates.format(endts) ranges.append((start, end)) apps = [] for row in desired_output: country = row[0] del row[0] for i in range(len(row)): count = row[i] start, end = ranges[i] for j in range(count): s = Suggestion() s.set_created(dates.random_date(start, end)) s.bibjson().country = country apps.append(s) return apps
def test_04_timeout(self): source = JournalFixtureFactory.make_journal_source() j = models.Journal(**source) j.save() time.sleep(2) after = datetime.utcnow() + timedelta(seconds=2300) # set a lock with a longer timout l = lock.lock("journal", j.id, "testuser", 2400) assert dates.parse(l.expires) > after
def stampify(val): return dates.parse(val, format=in_format)