def ingest(fn, journal, create, update, path_list): "wrapper around the import function with friendlier handling of problems" def _(path): try: results = fn(journal, path, create=create, update=update) LOG.debug("results of ingest", extra={'results': results}) return True except KeyboardInterrupt: raise except BaseException: LOG.exception("failed to import article") return False try: lmap(_, path_list) except KeyboardInterrupt: print('caught interrupt') exit(1)
def fix_broken_locs(stdout, stderr): # we have a bunch of (70+) articles with paths like: # /opt/bot-lax-adaptor/https:/s3-external-1.amazonaws.com/elife-publishing-expanded/24063.1/7ff76878-4424-48b0-be21-96f8bcfcb55c/elife-24063-v1.xml = /tmp/unpub-article-xml/elife-24063-v1.xml" res = models.ArticleFragment.objects \ .filter(type=models.XML2JSON) \ .defer('fragment') # prevents lockup def fix(frag): if not '-meta' in frag.fragment: stderr.write('skipping %s, no meta found' % frag) return loc = frag.fragment['-meta']['location'] bit = '/opt/bot-lax-adaptor/https:/' if loc.startswith(bit): newloc = 'https://' + loc[len(bit):] frag.fragment['-meta']['location'] = newloc frag.save() stderr.write('fixed: %s' % newloc) utils.lmap(fix, res.iterator())
def handle(self, *args, **options): try: qid = options['qid'] qid_list = [] if qid: qid_list = [qid] else: qid_list = models.Query.objects.all().values_list('id', flat=True) if not qid_list: LOG.info("no query objects found, nothing to upload") else: fnargs = subdict(options, ['upload']) lmap(partial(self.snapshot_query, **fnargs), qid_list) except Exception as err: LOG.exception(err) self.echo(str(err)) sys.exit(1) sys.exit(0)
def test_article_can_be_ingested_many_times_before_publication(self): "before an article is published it can be ingested many times" cases = json1, json2, json3 = lmap(copy.deepcopy, [self.ajson] * 3) json2['article']['title'] = 'foo' json3['article']['title'] = 'bar' # iterate through the three different cases, # assert each case is different from last prev_fragment = None for ajson in cases: av = ajson_ingestor.ingest(ajson) self.freshen(av) fragment = av.article.articlefragment_set.get(type=models.XML2JSON) if not prev_fragment: prev_fragment = fragment continue self.assertNotEqual(prev_fragment.fragment, fragment.fragment)
def article_version_history(msid, only_published=True): "returns a list of snippets for the history of the given article" article = models.Article.objects.get(manuscript_id=msid) avl = article.articleversion_set.all() if only_published: avl = avl.exclude(datetime_published=None) if not avl.count(): # no article versions available, fail raise models.Article.DoesNotExist() struct = { 'received': date_received(article), 'accepted': date_accepted(article), 'versions': lmap(article_snippet_json, avl) } if article.type in EXCLUDE_RECEIVED_ACCEPTED_DATES: struct = exsubdict(struct, ['received', 'accepted']) return struct
def relationships(msid, only_published=True): "returns all relationships for the given article" av = most_recent_article_version(msid, only_published) extr = relation_logic.external_relationships_for_article_version(av) intr = relation_logic.internal_relationships_for_article_version(av) # the internal relationships must be snippets of the latest version of that article def relation_snippet(art): try: return article_snippet_json(most_recent_article_version(art.manuscript_id, only_published)) except models.Article.DoesNotExist: # reference to an article that could not be found! # it is either: # * a stub (hasn't finished production) or # * unpublished (finished production but unpublished) # neither are error conditions pass avl = lfilter(None, lmap(relation_snippet, intr)) # pull the citation from each external relation extcl = [aver.citation for aver in extr] return extcl + avl
def relate_using_citation_list(av, citation_list): return lmap(partial(associate, av), citation_list)
def relate_using_msid_list(av, msid_list, quiet=False): return lmap(partial(relate_using_msid, av, quiet=quiet), msid_list)