def find_earliest_oai_date(self, metadata): """ Find the latest publication date (if any) in a record """ earliest = None for date in metadata['date']: try: parsed = tolerant_datestamp_to_datetime(date) if not valid_publication_date(parsed): continue if earliest is None or parsed < earliest: earliest = parsed except (DatestampError, ValueError): continue return earliest
def parse_crossref_date(date): """ Parse the date representation from CrossRef to a python object """ ret = None if 'date-parts' in date: try: for date in date['date-parts']: ret = date_from_dateparts(date) if ret is not None: return ret except ValueError: pass if 'raw' in date: ret = tolerant_datestamp_to_datetime(date['raw']).date() return ret
def parse_crossref_date(date): """ Parse the date representation from CrossRef to a python object """ if not date: return None ret = None if 'date-parts' in date: try: for date in date['date-parts']: ret = date_from_dateparts(date) if ret == datetime.date(year=1970, month=1, day=1): ret = None if ret is not None: return ret except ValueError: pass if 'raw' in date: ret = tolerant_datestamp_to_datetime(date['raw']).date() if valid_publication_date(ret): return ret
def _parse_date(cls, data): """ Parse the date representation from citeproc to a date object :param data: date extracted from citeproc :returns: date object or None """ if not isinstance(data, dict): return None d = None # First we try with date parts try: d = cls._parse_date_parts(data.get('date-parts')[0]) except Exception: pass # If this has no success, we try with raw date if d is None and data.get('raw') is not None: d = tolerant_datestamp_to_datetime(data['raw']).date() # We validate, if bad, then set to None if not valid_publication_date(d): d = None return d
def api_paper_query(request): try: fields = json.loads(request.body.decode('utf-8')) except (ValueError, UnicodeDecodeError): raise BadRequest('Invalid JSON payload') doi = fields.get('doi') if doi: p = None try: p = Paper.get_by_doi(doi) if not p: p = Paper.create_by_doi(doi) except MetadataSourceException: pass if p is None: raise BadRequest('Could not find a paper with this DOI') return {'status': 'ok', 'paper': p.json()} title = fields.get('title') if not isinstance(title, str) or not title or len(title) > 512: raise BadRequest( 'Invalid title, has to be a non-empty string shorter than 512 characters') date = fields.get('date') if not isinstance(date, str): raise BadRequest('A date is required') try: date = tolerant_datestamp_to_datetime(date) except ValueError as e: raise BadRequest(str(e)) authors = fields.get('authors') if not isinstance(authors, list): raise BadRequest('A list of authors is expected') parsed_authors = [] for a in authors: author = None if not isinstance(a, dict): raise BadRequest('Invalid author') if 'first' in a and 'last' in a: if not isinstance(a['first'], str) or not isinstance(a['last'], str) or not a['last']: raise BadRequest('Invalid (first,last) name provided') else: author = (a['first'], a['last']) elif 'plain' in a: if not isinstance(a['plain'], str) or not a['plain']: raise BadRequest('Invalid plain name provided') else: author = parse_comma_name(a['plain']) if author is None: raise BadRequest('Invalid author') parsed_authors.append(BareName.create(author[0], author[1])) if not authors: raise BadRequest('No authors provided') try: # Validate the metadata against our data model, # and compute the fingerprint to look up the paper in the DB. # This does NOT create a paper in the database - we do not want # to create papers for every search query we get! p = BarePaper.create(title, parsed_authors, date) except ValueError as e: raise BadRequest('Invalid paper: {}'.format(e)) try: model_paper = Paper.objects.get(fingerprint=p.fingerprint) return {'status': 'ok', 'paper': model_paper.json()} except Paper.DoesNotExist: return {'status': 'not found'}, 404
def tolerant_datestamp_to_datetime(self): self.assertEqual(tolerant_datestamp_to_datetime('2016-02-11T18:34:12Z'), datetime.datetime(2016, 2, 11, 18, 34, 12)) self.assertEqual(tolerant_datestamp_to_datetime('2016-02-11'), datetime.datetime(2016, 2, 11, 0, 0)) self.assertEqual(tolerant_datestamp_to_datetime('2016/02/11'), datetime.datetime(2016, 2, 11, 0, 0)) self.assertEqual(tolerant_datestamp_to_datetime('2016-02'), datetime.datetime(2016, 2, 1, 0, 0)) self.assertEqual(tolerant_datestamp_to_datetime('2016'), datetime.datetime(2016, 1, 1, 0, 0)) with self.assertRaises(ValueError): tolerant_datestamp_to_datetime('2016-02-11T18:34:12') # Z needed with self.assertRaises(ValueError): tolerant_datestamp_to_datetime('2016-02-11-3') # too many numbers with self.assertRaises(ValueError): tolerant_datestamp_to_datetime('2016-02-11T18:37:09:38') # too many numbers with self.assertRaises(ValueError): tolerant_datestamp_to_datetime('20151023371') with self.assertRaises(ValueError): tolerant_datestamp_to_datetime('2014T')
def api_paper_query(request): try: fields = json.loads(request.body.decode('utf-8')) except (ValueError, UnicodeDecodeError): raise BadRequest('Invalid JSON payload') doi = fields.get('doi') if doi: p = None try: p = Paper.get_by_doi(doi) if not p: p = Paper.create_by_doi(doi) except MetadataSourceException: pass if p is None: raise BadRequest('Could not find a paper with this DOI') return {'status': 'ok', 'paper': p.json()} title = fields.get('title') if not isinstance(title, str) or not title or len(title) > 512: raise BadRequest( 'Invalid title, has to be a non-empty string shorter than 512 characters' ) date = fields.get('date') if not isinstance(date, str): raise BadRequest('A date is required') try: date = tolerant_datestamp_to_datetime(date) except ValueError as e: raise BadRequest(str(e)) authors = fields.get('authors') if not isinstance(authors, list): raise BadRequest('A list of authors is expected') parsed_authors = [] for a in authors: author = None if not isinstance(a, dict): raise BadRequest('Invalid author') if 'first' in a and 'last' in a: if not isinstance(a['first'], str) or not isinstance( a['last'], str) or not a['last']: raise BadRequest('Invalid (first,last) name provided') else: author = (a['first'], a['last']) elif 'plain' in a: if not isinstance(a['plain'], str) or not a['plain']: raise BadRequest('Invalid plain name provided') else: author = parse_comma_name(a['plain']) if author is None: raise BadRequest('Invalid author') parsed_authors.append(BareName.create(author[0], author[1])) if not authors: raise BadRequest('No authors provided') try: # Validate the metadata against our data model, # and compute the fingerprint to look up the paper in the DB. # This does NOT create a paper in the database - we do not want # to create papers for every search query we get! p = BarePaper.create(title, parsed_authors, date) except ValueError as e: raise BadRequest('Invalid paper: {}'.format(e)) try: model_paper = Paper.objects.get(fingerprint=p.fingerprint) return {'status': 'ok', 'paper': model_paper.json()} except Paper.DoesNotExist: return {'status': 'not found'}, 404
def api_paper_query(request): try: fields = json.loads(request.body.decode('utf-8')) except (ValueError, UnicodeDecodeError): raise BadRequest('Invalid JSON payload') doi = fields.get('doi') if doi: p = None try: p = Paper.create_by_doi(doi, bare=True) except MetadataSourceException: pass if p is None: raise BadRequest('Could not find a paper with this DOI') return {'status': 'ok', 'paper': p.json()} title = fields.get('title') if not isinstance(title, unicode) or not title or len(title) > 512: raise BadRequest( 'Invalid title, has to be a non-empty string shorter than 512 characters' ) date = fields.get('date') if not isinstance(date, unicode): raise BadRequest('A date is required') try: date = tolerant_datestamp_to_datetime(date) except ValueError as e: raise BadRequest(unicode(e)) authors = fields.get('authors') if not isinstance(authors, list): raise BadRequest('A list of authors is expected') parsed_authors = [] for a in authors: author = None if not isinstance(a, dict): raise BadRequest('Invalid author') if 'first' in a and 'last' in a: if not isinstance(a['first'], unicode) or not isinstance( a['last'], unicode) or not a['last']: raise BadRequest('Invalid (first,last) name provided') else: author = (a['first'], a['last']) elif 'plain' in a: if not isinstance(a['plain'], unicode) or not a['plain']: raise BadRequest('Invalid plain name provided') else: author = parse_comma_name(a['plain']) if author is None: raise BadRequest('Invalid author') parsed_authors.append(BareName.create(author[0], author[1])) if not authors: raise BadRequest('No authors provided') try: p = BarePaper.create(title, parsed_authors, date) except ValueError: raise BadRequest('Invalid paper') return {'status': 'ok', 'paper': p.json()}
def api_paper_query(request): try: fields = json.loads(request.body.decode('utf-8')) except (ValueError, UnicodeDecodeError): raise BadRequest('Invalid JSON payload') doi = fields.get('doi') if doi: p = None try: p = Paper.create_by_doi(doi, bare=True) except MetadataSourceException: pass if p is None: raise BadRequest('Could not find a paper with this DOI') return {'status':'ok','paper':p.json()} title = fields.get('title') if type(title) != unicode or not title or len(title) > 512: raise BadRequest('Invalid title, has to be a non-empty string shorter than 512 characters') pubdate = fields.get('date') date = fields.get('date') if type(date) != unicode: raise BadRequest('A date is required') try: date = tolerant_datestamp_to_datetime(date) except ValueError as e: raise BadRequest(unicode(e)) authors = fields.get('authors') if type(authors) != list: raise BadRequest('A list of authors is expected') parsed_authors = [] for a in authors: author = None if type(a) != dict: raise BadRequest('Invalid author') if 'first' in a and 'last' in a: if type(a['first']) != unicode or type(a['last']) != unicode or not a['last']: raise BadRequest('Invalid (first,last) name provided') else: author = (a['first'],a['last']) elif 'plain' in a: if type(a['plain']) != unicode or not a['plain']: raise BadRequest('Invalid plain name provided') else: author = parse_comma_name(a['plain']) if author is None: raise BadRequest('Invalid author') parsed_authors.append(BareName.create(author[0],author[1])) if not authors: raise BadRequest('No authors provided') try: p = BarePaper.create(title, parsed_authors, date) except ValueError: raise BadRequest('Invalid paper') import backend.oai as oai oaisource = oai.OaiPaperSource(max_results=10) p = oaisource.fetch_accessibility(p) return {'status':'ok','paper':p.json()}