def calculate_citation_history_coordinates(recid): """Return a list of citation graph coordinates for RECID, sorted by year.""" result = {} for year in calculate_citation_graphe_x_coordinates(recid): result[year] = 0 if len(result) < CFG_BIBRANK_CITATION_HISTORY_MIN_X_POINTS: # do not generate graphs that have less than X points return [] for recid in get_cited_by(recid): rec_date = get_record_year(recid) # Some records simlpy do not have these fields if rec_date: # Maybe rec_date[0][0:4] has a typo and cannot # be converted to an int try: d = strptime(rec_date[0][:4], '%Y') except ValueError: pass else: if d.year in result: result[d.year] += 1 return sorted(result.iteritems())
def parse_date_for_googlescholar(datetime_string): """ Parse (guess) and return the date in a format adequate for Google Scholar. We don't use dateutils.guess_datetime() as this one might lead to results not accurate enough. """ datetime_string = CFG_PUNCTUATION_PATTERN_RE.sub(' ', datetime_string) datetime_string = CFG_SPACES_PATTERN_RE.sub(' ', datetime_string) def replace_month(match_obj): "Return translated month in the matching object" month = match_obj.group(2).strip() return match_obj.group(1) + \ CFG_MONTH_NAMES_MAPPING.get(month.upper(), month) + \ match_obj.group(3) parsed_datetime = None for dateformat in CFG_POSSIBLE_DATE_FORMATS: try: parsed_datetime = strptime(datetime_string.strip(), dateformat) break except: pass if not parsed_datetime: # Do it all again, with the translated version of the string translated_datetime_string = CFG_MONTHS_I18N_PATTERN_RE.sub( replace_month, datetime_string) for dateformat in CFG_POSSIBLE_DATE_FORMATS: try: parsed_datetime = strptime(translated_datetime_string.strip(), dateformat) break except: pass if parsed_datetime: return strftime('%Y/%m/%d', parsed_datetime) else: # Look for a year inside the string: try: return CFG_YEAR_PATTERN_RE.search(datetime_string).group(3) except: return '' return ''
def parse_date_for_googlescholar(datetime_string): """ Parse (guess) and return the date in a format adequate for Google Scholar. We don't use dateutils.guess_datetime() as this one might lead to results not accurate enough. """ datetime_string = CFG_PUNCTUATION_PATTERN_RE.sub(' ', datetime_string) datetime_string = CFG_SPACES_PATTERN_RE.sub(' ', datetime_string) def replace_month(match_obj): "Return translated month in the matching object" month = match_obj.group(2).strip() return match_obj.group(1) + \ CFG_MONTH_NAMES_MAPPING.get(month.upper(), month) + \ match_obj.group(3) parsed_datetime = None for dateformat in CFG_POSSIBLE_DATE_FORMATS: try: parsed_datetime = strptime(datetime_string.strip(), dateformat) break except: pass if not parsed_datetime: # Do it all again, with the translated version of the string translated_datetime_string = CFG_MONTHS_I18N_PATTERN_RE.sub(replace_month, datetime_string) for dateformat in CFG_POSSIBLE_DATE_FORMATS: try: parsed_datetime = strptime(translated_datetime_string.strip(), dateformat) break except: pass if parsed_datetime: return strftime('%Y/%m/%d', parsed_datetime) else: # Look for a year inside the string: try: return CFG_YEAR_PATTERN_RE.search(datetime_string).group(3) except: return '' return ''
def _sort_dates(self, val): """ Convert: '8 nov 2010' => '2010-11-08' 'nov 2010' => '2010-11-01' '2010' => '2010-01-01' """ datetext_format = "%Y-%m-%d" try: datestruct = strptime(val, datetext_format) except ValueError: try: datestruct = strptime(val, "%d %b %Y") except ValueError: try: datestruct = strptime(val, "%b %Y") except ValueError: try: datestruct = strptime(val, "%Y") except ValueError: return val return strftime(datetext_format, datestruct)
def format_element(bfo, place_label, publisher_label, date_label, separator=', ', date_format=""): """ Print imprint (Order: Name of publisher, place of publication and date of publication). Parameter <code>date_format</code> allows to specify the string representation of the output. The format string has the same behaviour as the strftime() function:: <pre>Eg: 1982-09-24 07:32:00 "%d %B %Y" -> 24 September 1982 "%I:%M" -> 07:32 </pre> @param separator: a separator between the elements of imprint @param place_label: a label to print before the publication place value @param publisher_label: a label to print before the publisher name @param date_label: a a label to print before the publication date @param date_format: date format @see: place.py, publisher.py, date.py, reprints.py, pagination.py """ place = bfo.field('260__a') publisher = bfo.field('260__b') date = bfo.field('260__c') out = "" if publisher != "sine nomine": out += publisher_label + ' ' + publisher + separator if place != "sine loco": out += place_label + ' ' + place + separator if len(date) > 0: if date_format != '': try: date_time = strptime(date, "%Y-%m-%d") out += date_label + " " + strftime(date_format, date_time) except ValueError: out += date_label + ' ' + date else: out += date_label + ' ' + date return out
def format_element(bfo, date_format='%d %B %Y', source_formats='%Y-%m-%d', source_fields="260__c", guess_source_format="no", ignore_date_format_for_year_only="yes"): """ Prints the imprint publication date. Parameter <code>date_format</code> allows to specify the string representation of the output. The format string has the same behaviour as the strftime() function: <pre>Eg: 1982-09-24 07:32:00 "%d %B %Y" -> 24 September 1982 "%I:%M" -> 07:32 </pre> Note that if input date is simply a year (4 digits), it is returned as such if <code>ignore_date_format_for_year_only</code> is set to 'yes', regardless of <code>date_format</code>. Parameter <code>source_formats</code> allows to specify the expected format of the date in the metadata. If the format does not match, the date cannot be parsed, and cannot be formatted according to <code>date_format</code>. Comma-separated values can be provided in order to test several input formats. Parameter <code>source_fields</code> defined the list of MARC fields where we would like to retrieve the date. First one matching <code>source_formats</code> is used. if none, fall back to first non-empty one. Parameter <code>guess_source_formats</code> when set to 'yes' allows to guess the date source format. @see: pagination.py, publisher.py, reprints.py, imprint.py, place.py @param date_format: output date format. @param source_formats: expected (comma-separated values) input date format. @param source_fields: the MARC fields (comma-separated values) to look up for the date. First non-empty one is used. @param guess_source_format: if 'yes', ignore 'source_format' and try to guess format using Python mxDateTime module. #param ignore_date_format_for_year_only: if 'yes', ignore 'date_format' when the metadata in the record contains a single year (4 digits). """ guess_source_format_p = guess_source_format.lower() == 'yes' source_marc_fields = [source_marc_field.strip() for source_marc_field in source_fields.split(',')] source_formats = [source_format.strip() for source_format in source_formats.split(',')] ignore_date_format_for_year_only_p = ignore_date_format_for_year_only.lower() == 'yes' parsed_datetime_value = None first_matched_raw_date = '' for source_marc_field in source_marc_fields: date_value = bfo.field(source_marc_field) if date_value: if not first_matched_raw_date: first_matched_raw_date = date_value if ignore_date_format_for_year_only_p and \ date_value.isdigit() and len(date_value) == 4: # Year. Return as such return date_value if guess_source_format_p: try: parsed_datetime_value = guess_datetime(date_value) break except: pass else: for source_format in source_formats: try: parsed_datetime_value = strptime(date_value, source_format) break except: pass if parsed_datetime_value: # We have correctly parsed one date! break if parsed_datetime_value: return strftime(date_format, parsed_datetime_value) else: return first_matched_raw_date