def write_csv(req, dictionary, journal_list, f_date, t_date, created_or_modified_date): return_val = '' for key in journal_list: val = dictionary[key] papers = perform_request_search(p="date%s:%s->%s" % (created_or_modified_date, f_date, t_date), c=val) if papers == []: continue return_val += key + '\n' return_val += ';'.join(['recid', 'cr. date', 'mod. date', 'DOI', 'XML', 'PDF', 'PDF/A', 'Complete record?', 'arXiv number', 'Copyright: authors', 'CC-BY', 'Funded by SCOAP3', 'arXiv category', 'notes', 'First delivery', 'First AB delivery', 'Last modification', 'PDF/A upload', 'DOI registration', 'Delivery diff', 'PDF/A diff']) + '\n' for recid in papers: rec = get_record(recid) doi = get_doi(rec) first_del = None first_ab_del = None last_mod = None doi_reg = None pdfa_del = None first_del, first_ab_del, last_mod, doi_reg, pdfa_del = get_delivery_data(recid, doi) record_compl = is_complete_record(recid) return_val += ';'.join(str(item) for item in [str(recid), get_creation_date(recid), get_modification_date(recid), doi, has_or_had_format(recid, '.xml').lstrip('<b>').rstrip('</b>'), has_or_had_format(recid, '.pdf').lstrip('<b>').rstrip('</b>'), has_or_had_format(recid, '.pdf;pdfa').lstrip('<b>').rstrip('</b>'), str(check_complete_rec(record_compl)), get_arxiv(rec).lstrip('<b>').rstrip('</b>'), is_compliant(recid, 'authors').lstrip('<b>').rstrip('</b>'), is_compliant(recid, 'cc').lstrip('<b>').rstrip('</b>'), is_compliant(recid, 'scoap3').lstrip('<b>').rstrip('</b>'), is_compliant(recid, 'category').lstrip('<b>').rstrip('</b>'), str([rec_key for rec_key, rec_val in record_compl.iteritems() if not rec_val]), str(first_del), str(first_ab_del), str(last_mod), str(pdfa_del), str(doi_reg), check_24h_delivery(first_del, doi_reg), check_24h_delivery(pdfa_del, doi_reg) ]) return_val += '\n' return return_val
def index(req): req.content_type = 'text/csv; charset=utf-8' req.headers_out['content-disposition'] = ('attachment; filename=scoap3_records_info.csv') req.write("SCOAP3 record id; Journal; Creation date; Modification date; Title; Authors; Publication info\n") for key, value in JOURNALS.iteritems(): recids = perform_request_search(c=value) for recid in recids: rec = get_record(recid) title = rec['245'][0][0][0][1].strip() creation_date = get_creation_date(recid) modification_date = get_modification_date(recid) authors = rec['100'][0][0][0][1] if '700' in rec: for author in rec['700']: authors += ' / %s' % (author[0][0][1]) publication_info = '' if '733' in rec: publication_info += "%s %s (%s) %s" % (rec['733'][0][0][0][1], rec['733'][0][0][1][1], rec['733'][0][0][2][1], rec['733'][0][0][3][1]) if '024' in rec: publication_info += " %s" % (rec['024'][0][0][0][1],) if '037' in rec: publication_info += " %s" % (rec['037'][0][0][0][1],) req.writeline("%s; %s; %s; %s; %s; %s; %s\n" % (recid, value, creation_date, modification_date, title, authors, publication_info))
def create_latest_additions_info(self, rg=CFG_WEBSEARCH_INSTANT_BROWSE, ln=CFG_SITE_LANG): """ Create info about latest additions that will be used for create_instant_browse() later. """ self.latest_additions_info = [] if self.nbrecs and self.reclist: # firstly, get last 'rg' records: recIDs = list(self.reclist) # FIXME: temporary hack in order to display tweaked latest # additions box for some CERN collections: if CFG_CERN_SITE: this_year = time.strftime("%Y", time.localtime()) if self.name in ['CERN Yellow Reports']: last_year = str(int(this_year) - 1) # detect recIDs only from this and past year: recIDs = list(self.reclist & \ search_pattern(p='year:%s or year:%s' % \ (this_year, last_year))) elif self.name in ['Videos']: # detect recIDs only from this year: recIDs = list(self.reclist & \ search_pattern(p='year:%s' % this_year)) total = len(recIDs) to_display = min(rg, total) for idx in range(total-1, total-to_display-1, -1): recid = recIDs[idx] self.latest_additions_info.append({'id': recid, 'format': format_record(recid, "hb", ln=ln), 'date': get_creation_date(recid, fmt="%Y-%m-%d<br />%H:%i")}) return
def get_record_checks(req, recids): if recids == '': return '' recids = recids.split(',') return_val = [] for rid in recids: try: recid = int(rid) rec = get_record(recid) doi = get_doi(rec) record_compl = is_complete_record(recid) return_val.append("""<tr> <td><a href="%s">%i</a></td> <td>%s</td> <td>%s</td> <td><a href="http://dx.doi.org/%s">%s</a></td> <td>%s</td> <td>%s</td> <td>%s</td> <td>%s</td> <td>%s</td> <td>%s</td> <td>%s</td> <td>%s</td> <td>%s</td> </tr>""" % (join(CFG_SITE_URL, 'record', str(recid)), recid, get_creation_date(recid), get_modification_date(recid), doi, doi, has_or_had_format(recid, '.xml'), has_or_had_format(recid, '.pdf'), has_or_had_format(recid, '.pdf;pdfa'), check_complete_rec(record_compl), get_arxiv(rec), is_compliant(recid, "authors"), is_compliant(recid, "cc"), is_compliant(recid, "scoap3"), str([rec_key for rec_key, rec_val in record_compl.iteritems() if not rec_val]))) except: recid = rid return_val.append("""<tr><th colspan="13" align="left"> <h2>%s</h2></th></tr>""" % (recid,)) return_val.append("""<tr> <th>recid</th> <th>cr. date</th> <th>mod. date</th> <th>DOI</th> <th>XML</th> <th>PDF</th> <th>PDF/A</th> <th>Complete record?</th> <th>arXiv number</th> <th>Copyright: authors</th> <th>CC-BY</th> <th>Funded by SCOAP3</th> <th>notes</th> </tr>""") return ''.join(return_val)
def get_list(): papers = [] prev_version = perform_request_search() for recid in prev_version: rec = get_record(recid) doi = None arxiv_id = None try: if ('2', 'DOI') in rec['024'][0][0]: for t in rec['024'][0][0]: if 'a' in t: doi = t[1] if not doi: print "No DOI for record: %i" % (recid, ) else: print "No DOI for record: %i" % (recid, ) except: print "No DOI for record: %i" % (recid, ) checksum, url, url_type = get_pdf(recid) if '037' in rec.keys(): if ('9', 'arXiv') in rec.get('037')[0][0]: for t in rec.get('037')[0][0]: if 'a' in t: arxiv_id = t[1] papers.append((recid, arxiv_id, get_creation_date(recid), checksum, url, url_type, doi)) return papers
def national_authors_list(req, search_country): req.content_type = 'text/csv; charset=utf-8' req.headers_out['content-disposition'] = ( 'attachment; ' 'filename=national_authors_list.csv') ids = perform_request_search(p="country:'%s'" % (search_country, )) req.write( "#;RECID;Title;Creation date;Publisher;Total # of authors;Authors name(given country only);Authors country;Authors affiliations\n" ) for number, recid in enumerate(ids): title = record_get_field_value(get_record(recid), '245', code="a") del_date = get_creation_date(recid) publisher = record_get_field_value(get_record(recid), '980', code="b") rec = get_record(recid) authors = [] author_count = 0 for f in ['100', '700']: if f in rec: for auth in rec[f]: author_count += 1 aff = '' name = '' country = '' hit = 0 for subfield, value in auth[0]: if subfield == 'a': name = value if subfield in ['v', 'u']: if aff: aff += ', ' + value else: aff = value if subfield == 'w': if country: country += ', ' + value else: country = value if search_country in value: hit = 1 if hit: authors.append({ 'name': name, 'affiliation': aff.replace('\n', ''), 'country': country }) for i, author in enumerate(authors): if i == 0: req.write("%s;%s;%s;%s;%s;%s;%s;%s;%s\n" % (number + 1, recid, title.replace('\n', ''), del_date, publisher, author_count, author['name'], author['country'], author['affiliation'])) else: req.write( ";;;;;;%s;%s;%s\n" % (author['name'], author['country'], author['affiliation']))
def create_latest_additions_info(self, rg=CFG_WEBSEARCH_INSTANT_BROWSE, ln=CFG_SITE_LANG): """ Create info about latest additions that will be used for create_instant_browse() later. """ self.latest_additions_info = [] if self.nbrecs and self.reclist: # firstly, get last 'rg' records: recIDs = list(self.reclist) of = 'hb' # CERN hack begins: tweak latest additions for selected collections: if CFG_CERN_SITE: # alter recIDs list for some CERN collections: this_year = time.strftime("%Y", time.localtime()) if self.name in ['CERN Yellow Reports','Videos']: last_year = str(int(this_year) - 1) # detect recIDs only from this and past year: recIDs = list(self.reclist & \ search_pattern_parenthesised(p='year:%s or year:%s' % \ (this_year, last_year))) elif self.name in ['VideosXXX']: # detect recIDs only from this year: recIDs = list(self.reclist & \ search_pattern_parenthesised(p='year:%s' % this_year)) elif self.name == 'CMS Physics Analysis Summaries' and \ 1281585 in self.reclist: # REALLY, REALLY temporary hack recIDs = list(self.reclist) recIDs.remove(1281585) # apply special filters: if self.name in ['Videos']: # select only videos with movies: recIDs = list(intbitset(recIDs) & \ search_pattern_parenthesised(p='collection:"PUBLVIDEOMOVIE"')) of = 'hvp' # sort some CERN collections specially: if self.name in ['Videos', 'Video Clips', 'Video Movies', 'Video News', 'Video Rushes', 'Webcast', 'ATLAS Videos', 'Restricted Video Movies', 'Restricted Video Rushes', 'LHC First Beam Videos', 'CERN openlab Videos']: recIDs = sort_records(None, recIDs, '269__c') # CERN hack ends. total = len(recIDs) to_display = min(rg, total) for idx in range(total-1, total-to_display-1, -1): recid = recIDs[idx] self.latest_additions_info.append({'id': recid, 'format': format_record(recid, of, ln=ln), 'date': get_creation_date(recid, fmt="%Y-%m-%d<br />%H:%i")}) return
def format(bfo, format='%Y-%m-%d'): ''' Get the record creation date. @param format: The date format in MySQL syntax ''' recID = bfo.recID out = get_creation_date(recID, format) return out
def format_element(bfo, format='%Y-%m-%d'): ''' Get the record creation date. @param format: The date format in MySQL syntax ''' recID = bfo.recID out = get_creation_date(recID, format) return out
def format_element(bfo, format="%Y-%m-%d"): """ Get the record creation date. @param format: The date format in MySQL syntax """ recID = bfo.recID out = get_creation_date(recID, format) return out
def create_latest_additions_info(self, rg=CFG_WEBSEARCH_INSTANT_BROWSE, ln=CFG_SITE_LANG): """ Create info about latest additions that will be used for create_instant_browse() later. """ self.latest_additions_info = [] if self.nbrecs and self.reclist: # firstly, get last 'rg' records: recIDs = list(self.reclist) # CERN hack begins: tweak latest additions for selected collections: if CFG_CERN_SITE: # alter recIDs list for some CERN collections: this_year = time.strftime("%Y", time.localtime()) if self.name in ['CERN Yellow Reports','Videos']: last_year = str(int(this_year) - 1) # detect recIDs only from this and past year: recIDs = list(self.reclist & \ search_pattern(p='year:%s or year:%s' % \ (this_year, last_year))) elif self.name in ['VideosXXX']: # detect recIDs only from this year: recIDs = list(self.reclist & \ search_pattern(p='year:%s' % this_year)) elif self.name == 'CMS Physics Analysis Summaries' and \ 1281585 in self.reclist: # REALLY, REALLY temporary hack recIDs = list(self.reclist) recIDs.remove(1281585) # apply special filters: if self.name in ['Videos']: # select only videos with movies: recIDs = list(intbitset(recIDs) & \ search_pattern(p='collection:"PUBLVIDEOMOVIE"')) # sort some CERN collections specially: if self.name in ['Videos', 'Video Clips', 'Video Movies', 'Video News', 'Video Rushes', 'Webcast', 'ATLAS Videos', 'Restricted Video Movies', 'Restricted Video Rushes', 'LHC First Beam Videos', 'CERN openlab Videos']: recIDs = sort_records(None, recIDs, '269__c') # CERN hack ends. total = len(recIDs) to_display = min(rg, total) for idx in range(total-1, total-to_display-1, -1): recid = recIDs[idx] self.latest_additions_info.append({'id': recid, 'format': format_record(recid, "hb", ln=ln), 'date': get_creation_date(recid, fmt="%Y-%m-%d<br />%H:%i")}) return
def get_general_delivery(recid, doi): delivery_data = run_sql("select * from doi where doi='%s'" % (doi,)) if delivery_data: first_del = get_creation_date(recid) first_ab_del = None last_mod = get_modification_date(recid) doi_reg = delivery_data[0][1] pdfa_del = None return (first_del, first_ab_del, last_mod, doi_reg, pdfa_del) else: return None
def national_authors_list(req, search_country): req.content_type = 'text/csv; charset=utf-8' req.headers_out['content-disposition'] = ('attachment; ' 'filename=national_authors_list.csv') ids = perform_request_search(p="country:'%s'" % (search_country,)) req.write("#;RECID;Title;Creation date;Publisher;Total # of authors;Authors name(given country only);Authors country;Authors affiliations\n") for number, recid in enumerate(ids): doi = record_get_field_value(get_record(recid), '024', ind1="7", code="a") journal = record_get_field_value(get_record(recid), '773', code="p") title = record_get_field_value(get_record(recid), '245', code="a") del_date = get_creation_date(recid) publisher = record_get_field_value(get_record(recid), '980', code="b") if not publisher: publisher = record_get_field_value(get_record(recid), '541', code="a") rec = get_record(recid) authors = [] author_count = 0 for f in ['100', '700']: if f in rec: for auth in rec[f]: author_count += 1 aff = '' name = '' country = '' hit = 0 for subfield, value in auth[0]: if subfield == 'a': name = value if subfield in ['v', 'u']: if aff: aff += ', ' + value else: aff = value if subfield == 'w': if country: country += ', ' + value else: country = value if search_country in value: hit = 1 if hit: authors.append({'name': name, 'affiliation': aff.replace('\n',''), 'country': country}) for i, author in enumerate(authors): if i == 0: req.write("%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s\n" % (number+1, recid, title.replace('\n',''), del_date, publisher, author_count, author['name'], author['country'], author['affiliation'])) else: req.write("||||||||%s|%s|%s\n" % (author['name'], author['country'], author['affiliation']))
def generate_mediaexport_album(recid, resource_id, json_format=True): """Return the report number of associate images. :param str recid: The record id. :param str resource_id: The report number. :param str json_format: If true, returns JSON dump, otherwise a dictionary """ # Fileds that are required MEDIA_CONFIG = { 'title_en': ('245', ' ', ' ', 'a'), 'title_fr': ('246', ' ', '1', 'a'), } bibarchive = BibRecDocs(recid) bibarchive_with_deleted = BibRecDocs(recid, deleted_too=True) bibdocs = bibarchive.list_bibdocs() doc_numbers = [(bibdoc.get_id(), bibdoc.get_docname(), bibdoc) for bibdoc in bibarchive_with_deleted.list_bibdocs()] doc_numbers.sort() # Calculate the size bibdoc_size = len(bibdocs) # Get the record record = get_record(recid) # Build the response entry = {} for key in MEDIA_CONFIG: entry[key] = record_get_field_value(record, *MEDIA_CONFIG[key]) entry['id'] = resource_id entry['record_id'] = str(recid) entry['entry_date'] = get_creation_date(recid) entry['total'] = bibdoc_size entry['type'] = 'album' entry['images'] = [] # Foreach doc create the corresponding report number for (docid, docname, bibdoc) in doc_numbers: if not bibdoc.deleted_p(): bibdoc_number = doc_numbers.index((bibdoc.get_id(), bibdoc.get_docname(), bibdoc)) + 1 image = generate_mediaexport(recid, True, resource_id, bibdoc_number, False) image['tirage_id'] = bibdoc_number image['id'] = '{0}-{1}'.format(image['id'], bibdoc_number) entry['images'].append(image) final = {} final['entries'] = [{'entry': entry}] if not CFG_JSON_AVAILABLE: return '' if json_format: return json.dumps(final) else: return final
def format_element(bfo, format='%Y-%m-%d', date_format='%Y-%m-%d'): ''' Get the record creation date. <b>Note:</b> parameter <code>format</code> is deprecated @param date_format: The date format in MySQL syntax ''' recID = bfo.recID # Let's be gentle and backward compatible while "format" is here: if date_format == '%Y-%m-%d' and format != '%Y-%m-%d': date_format = format out = get_creation_date(recID, date_format) return out
def format_element(bfo, format='%Y-%m-%d', date_format='%Y-%m-%d'): ''' Get the record modification date. <b>Note:</b> parameter <code>format</code> is deprecated @param date_format: The date format in MySQL syntax ''' _ = gettext_set_language(bfo.lang) recID = bfo.recID return _( "Record created on %(creation_date)s, modified on %(modification_date)s" ) % { 'creation_date': get_creation_date(recID, "%Y-%m-%d"), 'modification_date': get_modification_date(recID, "%Y-%m-%d") }
def bst_get_new_ilo_publications(number_results_to_display=5): """ Bibtasklet responsible of the generation of the list containing the most recent ILO publications and to update automatically the query to get the most recent ILO publications. @param number_results_to_display: number of results to display to users in main page. """ task_update_progress( "Start updating query for collection Latest publications by ILO") # get current month and get the 2 previous ones now = datetime.datetime.now() month_1 = (now + dateutil.relativedelta.relativedelta(months=-1)).month month_2 = (now + dateutil.relativedelta.relativedelta(months=-2)).month # update also tab collection where id=113 dbquery = """(946__d:2013-%s-* or 946__d:2013-%s-* or 946__d:2013-%s-*) and (997__a:2012 or 997__a:2013) and (992__a:"ILO publication") not callno:GB.* not callno:NYP""" % ( now.month, month_1, month_2) query = """update collection set dbquery='%s' where id=113;""" % dbquery run_sql(query) task_update_progress( "Finished updating query for collection Latest publications by ILO") task_update_progress("Start calculating new ILO publications") ILO_publications_recids = perform_request_search(p='%s' % dbquery) dict_creation_date_per_recid = {} for recid in ILO_publications_recids: dict_creation_date_per_recid.update( {recid: get_creation_date(recid, fmt="%Y-%m-%d %H:%i:%S")}) sorted_dict_creation_date_per_recid = sorted( dict_creation_date_per_recid.items(), key=lambda x: x[1]) new_ilo_publications = sorted_dict_creation_date_per_recid[ -int(number_results_to_display):] new_ilo_publications.reverse() new_ilo_publications_recids = [t[0] for t in new_ilo_publications] new_ilo_publications_file = open(CFG_TMPDIR + "/new_ILO_publications", "w") new_ilo_publications_file.write(repr(new_ilo_publications_recids)) new_ilo_publications_file.close() task_update_progress("Finished calculating new ILO publications") return dbquery
def get_delivery_data(recid, doi): first_del = None first_ab_del = None last_mod = None doi_reg = None pdfa_del = None pub_date = None delivery_data = get_detiled_package_delivery(doi) if delivery_data: tmp = delivery_data[0] if not tmp: tmp = get_creation_date(recid) return tmp, delivery_data[1], delivery_data[2], delivery_data[3], delivery_data[4], delivery_data[5] else: delivery_data = get_general_delivery(recid, doi) if delivery_data: return delivery_data[0], delivery_data[1], delivery_data[2], delivery_data[3], delivery_data[4], delivery_data[5] return first_del, first_ab_del, last_mod, doi_reg, pdfa_del, pub_date
def write_csv(req, dictionary, journal_list, f_date, t_date, created_or_modified_date): return_val = '' for key in journal_list: val = dictionary[key] papers = perform_request_search(p="date%s:%s->%s" % (created_or_modified_date, f_date, t_date), c=val) if papers == []: continue return_val += key return_val += ','.join(['recid', 'cr. date', 'mod. date', 'DOI', 'XML', 'PDF', 'PDF/A', 'Complete record?', 'arXiv number', 'Copyright: authors', 'CC-BY', 'Funded by SCOAP3', 'notes']) + '\n' for recid in papers: rec = get_record(recid) doi = get_doi(rec) record_compl = is_complete_record(recid) return_val += ','.join(str(item) for item in [str(recid), get_creation_date(recid), get_modification_date(recid), doi, has_or_had_format(recid, '.xml').lstrip('<b>').rstrip('</b>'), has_or_had_format(recid, '.pdf').lstrip('<b>').rstrip('</b>'), has_or_had_format(recid, '.pdf;pdfa').lstrip('<b>').rstrip('</b>'), str(check_complete_rec(record_compl)), get_arxiv(rec).lstrip('<b>').rstrip('</b>'), is_compliant(recid, 'authors').lstrip('<b>').rstrip('</b>'), is_compliant(recid, 'cc').lstrip('<b>').rstrip('</b>'), is_compliant(recid, 'scoap3').lstrip('<b>').rstrip('</b>'), str([rec_key for rec_key, rec_val in record_compl.iteritems() if not rec_val])]) return_val += '\n' return return_val
def get_delivery_data(recid, doi): first_del = None first_ab_del = None last_mod = None doi_reg = None pdfa_del = None pub_date = None delivery_data = get_detiled_package_delivery(doi) if delivery_data: tmp = delivery_data[0] if not tmp: tmp = get_creation_date(recid) return tmp, delivery_data[1], delivery_data[2], delivery_data[ 3], delivery_data[4], delivery_data[5] else: delivery_data = get_general_delivery(recid, doi) if delivery_data: return delivery_data[0], delivery_data[1], delivery_data[ 2], delivery_data[3], delivery_data[4], delivery_data[5] return first_del, first_ab_del, last_mod, doi_reg, pdfa_del, pub_date
def index(req): req.content_type = 'text/csv; charset=utf-8' req.headers_out['content-disposition'] = ( 'attachment; filename=scoap3_records_info.csv') req.write( "SCOAP3 record id; Journal; Creation date; Modification date; Title; Authors; Publication info\n" ) for key, value in JOURNALS.iteritems(): recids = perform_request_search(c=value) for recid in recids: rec = get_record(recid) if '245' in rec: title = rec['245'][0][0][0][1].strip() else: title = "" creation_date = get_creation_date(recid) modification_date = get_modification_date(recid) if '100' in rec: authors = rec['100'][0][0][0][1] else: authors = "" if '700' in rec: for author in rec['700']: authors += ' / %s' % (author[0][0][1]) publication_info = '' if '733' in rec: publication_info += "%s %s (%s) %s" % ( rec['733'][0][0][0][1], rec['733'][0][0][1][1], rec['733'][0][0][2][1], rec['733'][0][0][3][1]) if '024' in rec: publication_info += " %s" % (rec['024'][0][0][0][1], ) if '037' in rec: publication_info += " %s" % (rec['037'][0][0][0][1], ) req.write("%s; %s; %s; %s; %s; %s; %s\n" % (recid, value, creation_date, modification_date, title, authors, publication_info))
def check_records(records): for record in records: # adds missing data in year field year = record_get_field_value(record, '773', code='y') if not year: for position, value in record.iterfield('773__y'): record.amend_field( position, get_creation_date(record_get_field_value(record, '001'), '%Y')) # remove empty subfields if '773' in record: for subfield, value in record['773'][0][0]: if not value or value == '-': for position, val in record.iterfield('773__%s' % (subfield, )): record.delete_field( position, 'Deleteing empty field: %s' % (subfield, )) else: write_message("Missing 773 field in record %s" % record_get_field_value(record, '001'))
def generate_mediaexport(recid, is_image, resource_id, tirage, wrapped, json_format=True): """Generates the JSON with the info needed to export a media resource to CERN-Drupal""" """Mandatory fields to export: title_en, title_fr, caption_en, caption_fr, copyright_holder, copyright_date, attribution (image), keywords (image), directors (video), producer (video) """ MEDIA_CONFIG = {'title_en': ('245', ' ', ' ', 'a'), 'title_fr': ('246', ' ', '1', 'a'), 'keywords': ('653', '1', ' ', 'a'), 'copyright_holder': ('542', ' ', ' ', 'd'), 'copyright_date': ('542', ' ', ' ', 'g'), 'license_url': ('540', ' ', ' ', 'a'), 'license_desc': ('540', ' ', ' ', 'b'), 'license_body': ('540', ' ', ' ', 'u'), 'author': ('100', ' ', ' ', 'a'), 'affiliation': ('100', ' ', ' ', 'u'), 'directors': ('700', ' ', ' ', 'a'), 'video_length': ('300', ' ', ' ', 'a'), 'language': ('041', ' ', ' ', 'a'), 'creation_date': ('269', ' ', ' ', 'c'), 'abstract_en': ('520', ' ', ' ', 'a'), 'abstract_fr': ('590', ' ', ' ', 'a')} entry = {} record = get_record(recid) for key in MEDIA_CONFIG: entry[key] = record_get_field_value(record, *MEDIA_CONFIG[key])#.encode('utf-8') entry['id'] = resource_id entry['record_id'] = str(recid) entry['type'] = is_image and "image" or "video" entry['entry_date'] = get_creation_date(recid) toc_recid = 0 toc_record = {} if not is_image and 'asset' in record_get_field_value(record, *('970', ' ', ' ', 'a')): toc_repnum = record_get_field_value(record, *('773', ' ', ' ', 'r')) if toc_repnum: try: toc_recid = search_pattern(p='reportnumber:"%s"' %toc_repnum)[0] except IndexError: pass #corner cases for copyright & licence if not entry['copyright_holder']: entry['copyright_holder'] = 'CERN' if not entry['license_body']: entry['license_body'] = 'CERN' if not entry['license_desc']: entry['license_desc'] = 'CERN' if not entry['license_url']: from invenio.bibknowledge import get_kb_mapping try: entry['license_url'] = get_kb_mapping(kb_name='LICENSE2URL', key=entry['license_desc'])['value'] except KeyError: pass #keywords entry['keywords'] = ','.join(record_get_field_values(record, *MEDIA_CONFIG['keywords'])) #attribution if not entry.get('author', '') and not entry.get('attribution', '') and toc_recid > 0: if not toc_record: toc_record = get_record(toc_recid) entry['author'] = record_get_field_value(toc_record, *MEDIA_CONFIG['author']) entry['affiliation'] = record_get_field_value(toc_record, *MEDIA_CONFIG['affiliation']) if not entry.get('directors', ''): entry['directors'] = ','.join(record_get_field_values(toc_record, *MEDIA_CONFIG['directors'])) #photos if is_image: if entry['author']: entry['attribution'] = entry['author'] if entry['affiliation']: entry['attribution'] += ': %s' % entry['affiliation'] del entry['directors'] else: #videos if entry['author']: entry['producer'] = entry['author'] # Get all files from record files_field = ('856', '7', ' ', 'u') # Filter all that are images thumbnails = [ image for image in record_get_field_values(record, *files_field) if 'jpg' in image ] # If exists get the first one if thumbnails: entry['thumbnail'] = thumbnails[0] del entry['author'] del entry['affiliation'] # #title if not entry['title_en'] and not entry['title_fr'] and toc_recid > 0: if not toc_record: toc_record = get_record(toc_recid) entry['title_en'] = record_get_field_value(toc_record, *MEDIA_CONFIG['title_en']) entry['title_fr'] = record_get_field_value(toc_record, *MEDIA_CONFIG['title_fr']) #crop, media storage, caption if is_image: entry['file_params'] = {'size': ['small', 'medium', 'large'], 'crop': False} if 'MediaArchive' in record_get_field_values(record, *('856', '7', ' ', '2')): entry['caption_en'] = get_photolab_image_caption(record, tirage) entry['caption_fr'] = '' else: brd = BibRecDocs(recid, deleted_too=True) doc_numbers = [(bibdoc.get_id(), bibdoc) for bibdoc in brd.list_bibdocs()] doc_numbers.sort() bibdoc = doc_numbers[tirage-1][1] entry['filename'] = brd.get_docname(bibdoc.get_id()) #bibdoc.get_docname() if 'crop' in [bibdocfile.get_subformat() for bibdocfile in bibdoc.list_latest_files()]: entry['file_params']['crop'] = True if not bibdoc.deleted_p(): for bibdoc_file in bibdoc.list_latest_files(): entry['caption_en'] = bibdoc_file.get_comment() entry['caption_fr'] = bibdoc_file.get_description() if entry.get('caption_en', ''): break if not entry.get('caption_en', ''): entry['caption_en'] = entry['abstract_en'] if not entry.get('caption_fr', ''): entry['caption_fr'] = entry['abstract_fr'] if is_image: del entry['language'] del entry['video_length'] # we don't need it del entry['abstract_en'] del entry['abstract_fr'] #make sure all mandatory fields are sent MANDATORY_FIELDS = ['title_en', 'title_fr', 'caption_en', 'caption_fr', 'copyright_holder', 'copyright_date'] MANDATORY_FIELDS_IMAGE = MANDATORY_FIELDS + ['attribution', 'keywords'] MANDATORY_FIELDS_VIDEO = MANDATORY_FIELDS + ['directors', 'producer', 'thumbnail'] if is_image: mandatory_fields_all = MANDATORY_FIELDS_IMAGE else: mandatory_fields_all = MANDATORY_FIELDS_VIDEO for field in mandatory_fields_all: entry.setdefault(field, '') # In case we want to embed the object if wrapped: final = {} final['entries'] = [{'entry': entry}] if not CFG_JSON_AVAILABLE: return '' if json_format: return json.dumps(final) else: return final else: return entry
def csu(req): req.content_type = 'text/csv; charset=utf-8' req.headers_out['content-disposition'] = ('attachment; filename=csu_records_info.csv') search_patterns = ["California Polytechnic State University", "Carson", "Dominguez Hills", "Fresno", "California State University Fullerton", "California State University Long Beach", "California State University, Los Angeles", "Northridge", "California State University, Sacramento", "San Diego State University", "sfsu"] def special_aff(author): affs = [] au = "" name = "" for i in author: if i[0] == 'v' and value in i[1]: affs.append(i[1]) if i[0] == 'a': name = i[1] if len(affs) > 0: au = name + '(' for aff in affs: au += aff + ', ' au += '), ' return au req.write("SCOAP3 record id; Journal; Creation date; Modification date; Title; Authors; Publication info\n") for value in search_patterns: recids = perform_request_search(p="affiliation:'%s'" % (value,)) # req.write("%s; %s\n" % (value, len(recids) )) for recid in recids: rec = get_record(recid) if '245' in rec: title = rec['245'][0][0][0][1].strip() else: title = "" creation_date = get_creation_date(recid) modification_date = get_modification_date(recid) authors = "" if '100' in rec: authors += special_aff(rec['100'][0][0]) if '700' in rec: for author in rec['700']: authors += special_aff(author[0]) publication_info = '' if '773' in rec: for p in rec['773'][0][0]: if p[0] == 'p': publication_info = p[1] publication_info += " %s" % (rec['024'][0][0][0][1],) if '037' in rec: publication_info += " %s" % (rec['037'][0][0][0][1],) req.write("%s; %s; %s; %s; %s; %s; %s\n" % (recid, value, creation_date, modification_date, title, authors, publication_info))
def csu(req): req.content_type = 'text/csv; charset=utf-8' req.headers_out['content-disposition'] = ( 'attachment; filename=csu_records_info.csv') search_patterns = [ "California Polytechnic State University", "Carson", "Dominguez Hills", "Fresno", "California State University Fullerton", "California State University Long Beach", "California State University, Los Angeles", "Northridge", "California State University, Sacramento", "San Diego State University", "sfsu" ] def special_aff(author): affs = [] au = "" name = "" for i in author: if i[0] == 'v' and value in i[1]: affs.append(i[1]) if i[0] == 'a': name = i[1] if len(affs) > 0: au = name + '(' for aff in affs: au += aff + ', ' au += '), ' return au req.write( "SCOAP3 record id; Journal; Creation date; Modification date; Title; Authors; Publication info\n" ) for value in search_patterns: recids = perform_request_search(p="affiliation:'%s'" % (value, )) # req.write("%s; %s\n" % (value, len(recids) )) for recid in recids: rec = get_record(recid) if '245' in rec: title = rec['245'][0][0][0][1].strip() else: title = "" creation_date = get_creation_date(recid) modification_date = get_modification_date(recid) authors = "" if '100' in rec: authors += special_aff(rec['100'][0][0]) if '700' in rec: for author in rec['700']: authors += special_aff(author[0]) publication_info = '' if '773' in rec: for p in rec['773'][0][0]: if p[0] == 'p': publication_info = p[1] publication_info += " %s" % (rec['024'][0][0][0][1], ) if '037' in rec: publication_info += " %s" % (rec['037'][0][0][0][1], ) req.write("%s; %s; %s; %s; %s; %s; %s\n" % (recid, value, creation_date, modification_date, title, authors, publication_info))
def write_csv(req, dictionary, journal_list, f_date, t_date, created_or_modified_date): return_val = '' for key in journal_list: val = dictionary[key] papers = perform_request_search(p="date%s:%s->%s" % (created_or_modified_date, f_date, t_date), c=val) if papers == []: continue return_val += key return_val += ';'.join(['recid', 'cr. date', 'mod. date', 'DOI', 'XML', 'PDF', 'PDF/A', 'Complete record?', 'arXiv number', 'Copyright: authors', 'CC-BY', 'Funded by SCOAP3', 'notes', 'First delivery', 'First AB delivery', 'Last modification', 'PDF/A upload', 'DOI registration', 'Delivery diff', 'PDF/A diff']) + '\n' for recid in papers: rec = get_record(recid) doi = get_doi(rec) first_del = None first_ab_del = None last_mod = None doi_reg = None pdfa_del = None delivery_data = run_sql("SELECT doi.creation_date AS 'doi_reg', package.name AS 'pkg_name', package.delivery_date AS 'pkg_delivery' FROM doi_package LEFT JOIN doi ON doi_package.doi=doi.doi LEFT JOIN package ON package.id=doi_package.package_id WHERE doi_package.doi=%s ORDER BY package.delivery_date ASC", (doi,), with_dict=True) if delivery_data: first_del = delivery_data[0]['pkg_delivery'] first_ab_del = get_delivery_of_firts_ab_package(delivery_data) last_mod = delivery_data[-1]['pkg_delivery'] doi_reg = delivery_data[0]['doi_reg'] pdfa_del = get_delivery_of_firts_pdfa(delivery_data) record_compl = is_complete_record(recid) return_val += ';'.join(str(item) for item in [str(recid), get_creation_date(recid), get_modification_date(recid), doi, has_or_had_format(recid, '.xml').lstrip('<b>').rstrip('</b>'), has_or_had_format(recid, '.pdf').lstrip('<b>').rstrip('</b>'), has_or_had_format(recid, '.pdf;pdfa').lstrip('<b>').rstrip('</b>'), str(check_complete_rec(record_compl)), get_arxiv(rec).lstrip('<b>').rstrip('</b>'), is_compliant(recid, 'authors').lstrip('<b>').rstrip('</b>'), is_compliant(recid, 'cc').lstrip('<b>').rstrip('</b>'), is_compliant(recid, 'scoap3').lstrip('<b>').rstrip('</b>'), is_compliant(recid, 'category').lstrip('<b>').rstrip('</b>'), str([rec_key for rec_key, rec_val in record_compl.iteritems() if not rec_val]), str(first_del), str(first_ab_del), str(last_mod), str(pdfa_del), str(doi_reg), check_24h_delivery(first_ab_del, doi_reg), check_24h_delivery(pdfa_del, doi_reg) ]) return_val += '\n' return return_val
def format_element(bfo): """ Displays the description of how users should cite any content of the archive. The citation includes: For blogs: "title". (record_creation_date). record_url Retrieved from the original "original_url" For blog posts: author. "title". Blog: "blog_title". (record_creation_date). record_url Retrieved from the original "original_url" For comments: author. Blog post: "post_title". (record_creation_date). record_url Retrieved from the original "original_url" """ coll = bfo.fields('980__a')[0] recid = bfo.control_field('001') # let's get the fields we want to show if coll in ["BLOGPOST", "COMMENT"]: author = bfo.fields('100__a')[0] try: original_creation_date = bfo.fields('269__c')[0] except: original_creation_date = "" try: title = bfo.fields('245__a')[0] except: title = "Untitled" try: original_url = bfo.fields('520__u')[0] except: raise Exception("URL not found") # creation date of a record record_creation_date = get_creation_date(recid) # url in the archive record_url = CFG_SITE_URL + "/record/" + recid if coll == "BLOGPOST": # we will also show the blog's title of # the corresponding blog post blog_recid = get_parent_blog(recid) blog_bfo = BibFormatObject(blog_recid) try: blog_title = blog_bfo.fields('245__a')[0] except: blog_title = 'Untitled' description = """<table style="border:1px solid black;"><tr><td>\ <span><b>%s</b>. '%s'. Blog: '%s'. </br> \ (%s). <i>'%s'</i> </br> \ Retrieved from the original <i>'%s'</i><span></td></tr></table>""" \ % (author, title, blog_title, record_creation_date, record_url, original_url) elif coll == "COMMENT": # we will also show the post's title of # the corresponding comment post_recid = get_parent_post(recid) post_bfo = BibFormatObject(post_recid) try: post_title = post_bfo.fields('245__a')[0] except: post_title = 'Untitled' description = """<table style="border:1px solid black;"><tr><td>\ <span><b>%s. </b>Blog post: '%s'.</br> \ (%s). <i>'%s'</i> </br> \ Retrieved from the original <i>'%s'</i><span></td></tr></table>""" \ % (author, post_title, record_creation_date, record_url, original_url) else: # coll == "BLOG" description = """<table style="border:1px solid black;"><tr><td>\ <span>'%s' </br> \ (%s). <i>'%s'</i> </br> \ Retrieved from the original <i>'%s'</i><span></td></tr></table>""" \ % (title, record_creation_date, record_url, original_url) out = """ <script type="text/javascript"> function displayCitationDescription(){ var description = document.getElementById('description'); var citation_link = document.getElementById('citation_link'); if (description.style.display == 'none'){ description.style.display = ''; citation_link.innerHTML = "Hide citation description" } else { description.style.display = 'none'; citation_link.innerHTML = "How to cite this" } } </script> """ out += '<span id="description" style="">' + description + '</span>' out += '<a class="moreinfo" id="citation_link" \ href="javascript:void(0)" onclick="displayCitationDescription()""></a>' out += '<script type="text/javascript">displayCitationDescription()</script>' return out
def get_record_checks(req, recids): if recids == '': return '' recids = recids.split(',') return_val = [] for rid in recids: try: recid = int(rid) rec = get_record(recid) doi = get_doi(rec) delivery_data = run_sql("SELECT doi.creation_date AS 'doi_reg', package.name AS 'pkg_name', package.delivery_date AS 'pkg_delivery' FROM doi_package LEFT JOIN doi ON doi_package.doi=doi.doi LEFT JOIN package ON package.id=doi_package.package_id WHERE doi_package.doi=%s ORDER BY package.delivery_date ASC", (doi,), with_dict=True) first_del = None first_ab_del = None last_mod = None doi_reg = None pdfa_del = None if delivery_data: first_del = delivery_data[0]['pkg_delivery'] first_ab_del = get_delivery_of_firts_ab_package(delivery_data) last_mod = delivery_data[-1]['pkg_delivery'] doi_reg = delivery_data[0]['doi_reg'] pdfa_del = get_delivery_of_firts_pdfa(delivery_data) record_compl = is_complete_record(recid) return_val.append("""<tr> <td><a href="%s">%i</a></td> <td>%s</td> <td>%s</td> <td><a href="http://dx.doi.org/%s">%s</a></td> <td>%s</td> <td>%s</td> <td>%s</td> <td>%s</td> <td>%s</td> <td>%s</td> <td>%s</td> <td>%s</td> <td>%s</td> <td>%s</td> <td>%s</td> <td>%s</td> <td>%s</td> <td>%s</td> <td>%s</td> <td %s>%s</td> <td %s>%s</td> </tr>""" % (join(CFG_SITE_URL, 'record', str(recid)), recid, get_creation_date(recid), get_modification_date(recid), doi, doi, has_or_had_format(recid, '.xml'), has_or_had_format(recid, '.pdf'), has_or_had_format(recid, '.pdf;pdfa'), check_complete_rec(record_compl), get_arxiv(rec), is_compliant(recid, "authors"), is_compliant(recid, "cc"), is_compliant(recid, "scoap3"), is_compliant(recid. "category"), str([rec_key for rec_key, rec_val in record_compl.iteritems() if not rec_val]), str(first_del), str(first_ab_del), str(last_mod), str(pdfa_del), str(doi_reg), format_24h_delivery(check_24h_delivery(first_del, doi_reg)), check_24h_delivery(first_del, doi_reg), format_24h_delivery(check_24h_delivery(pdfa_del, doi_reg)), check_24h_delivery(pdfa_del, doi_reg))) except Exception: register_exception() recid = rid return_val.append("""<tr><th colspan="13" align="left"> <h2>%s</h2></th></tr>""" % (recid,)) return_val.append("""<tr> <th>recid</th> <th>cr. date</th> <th>mod. date</th> <th>DOI</th> <th>XML</th> <th>PDF</th> <th>PDF/A</th> <th>Complete record?</th> <th>arXiv number</th> <th>Copyright: authors</th> <th>CC-BY</th> <th>Funded by SCOAP3</th> <th>Category</th> <th>notes</th> <th>First delivery</th> <th>First AB delivery</th> <th>Last modification</th> <th>PDF/A upload</th> <th>DOI registration</th> <th>Delivery diff</th> <th>PDF/A diff</th> </tr>""") return ''.join(return_val)
def get_record_checks(req, recids): if recids == '': return '' recids = recids.split(',') return_val = [] for rid in recids: try: recid = int(rid) rec = get_record(recid) doi = get_doi(rec) first_del, first_ab_del, last_mod, doi_reg, pdfa_del, pub_date = get_delivery_data(recid, doi) record_compl = is_complete_record(recid) return_val.append("""<tr> <td><a href="%s">%i</a></td> <td>%s</td> <td>%s</td> <td><a href="http://dx.doi.org/%s">%s</a></td> <td>%s</td> <td>%s</td> <td>%s</td> <td>%s</td> <td>%s</td> <td>%s</td> <td>%s</td> <td>%s</td> <td>%s</td> <td>%s</td> <td>%s</td> <td>%s</td> <td>%s</td> <td>%s</td> <td>%s</td> <td %s>%s</td> <td %s>%s</td> <td>%s</td> <td %s>%s</td> </tr>""" % (join(CFG_SITE_URL, 'record', str(recid)), recid, get_creation_date(recid), get_modification_date(recid), doi, doi, has_or_had_format(recid, '.xml'), has_or_had_format(recid, '.pdf'), has_or_had_format(recid, '.pdf;pdfa'), check_complete_rec(record_compl), get_arxiv(rec), is_compliant(recid, "authors"), is_compliant(recid, "cc"), is_compliant(recid, "scoap3"), is_compliant(recid, "category"), str([rec_key for rec_key, rec_val in record_compl.iteritems() if not rec_val]), str(first_del), str(first_ab_del), str(last_mod), str(pdfa_del), str(doi_reg), format_24h_delivery(check_24h_delivery(first_del, doi_reg)), check_24h_delivery(first_del, doi_reg), format_24h_delivery(check_24h_delivery(pdfa_del, doi_reg)), check_24h_delivery(pdfa_del, doi_reg), str(pub_date), format_24h_delivery(check_24h_delivery(first_del, pub_date)), check_24h_delivery(first_del, pub_date))) except Exception: register_exception() recid = rid return_val.append("""<tr><th colspan="13" align="left"> <h2>%s</h2></th></tr>""" % (recid,)) return_val.append("""<tr> <th>recid</th> <th>cr. date</th> <th>mod. date</th> <th>DOI</th> <th>XML</th> <th>PDF</th> <th>PDF/A</th> <th>Complete record?</th> <th>arXiv number</th> <th>Copyright: authors</th> <th>CC-BY</th> <th>Funded by SCOAP3</th> <th>arXiv category</th> <th>notes</th> <th>First delivery</th> <th>First AB delivery</th> <th>Last modification</th> <th>PDF/A upload</th> <th>DOI registration</th> <th>Delivery diff</th> <th>PDF/A diff</th> <th>Publication online</th> <th>Pub. online diff</th> </tr>""") return ''.join(return_val)
def get_record_checks(req, recids): if recids == '': return '' recids = recids.split(',') return_val = [] for rid in recids: try: recid = int(rid) rec = get_record(recid) doi = get_doi(rec) first_del, first_ab_del, last_mod, doi_reg, pdfa_del, pub_date = get_delivery_data( recid, doi) record_compl = is_complete_record(recid) return_val.append( """<tr> <td><a href="%s">%i</a></td> <td>%s</td> <td>%s</td> <td><a href="http://dx.doi.org/%s">%s</a></td> <td>%s</td> <td>%s</td> <td>%s</td> <td>%s</td> <td>%s</td> <td>%s</td> <td>%s</td> <td>%s</td> <td>%s</td> <td>%s</td> <td>%s</td> <td>%s</td> <td>%s</td> <td>%s</td> <td>%s</td> <td %s>%s</td> <td %s>%s</td> <td>%s</td> <td %s>%s</td> </tr>""" % (join(CFG_SITE_URL, 'record', str(recid)), recid, get_creation_date(recid), get_modification_date(recid), doi, doi, has_or_had_format( recid, '.xml'), has_or_had_format( recid, '.pdf'), has_or_had_format(recid, '.pdf;pdfa'), check_complete_rec(record_compl), get_arxiv(rec), is_compliant(recid, "authors"), is_compliant( recid, "cc"), is_compliant( recid, "scoap3"), is_compliant(recid, "category"), str([ rec_key for rec_key, rec_val in record_compl.iteritems() if not rec_val ]), str(first_del), str(first_ab_del), str(last_mod), str(pdfa_del), str(doi_reg), format_24h_delivery(check_24h_delivery(first_del, doi_reg)), check_24h_delivery(first_del, doi_reg), format_24h_delivery(check_24h_delivery(pdfa_del, doi_reg)), check_24h_delivery(pdfa_del, doi_reg), str(pub_date), format_24h_delivery(check_24h_delivery(first_del, pub_date)), check_24h_delivery(first_del, pub_date))) except Exception: register_exception() recid = rid return_val.append("""<tr><th colspan="13" align="left"> <h2>%s</h2></th></tr>""" % (recid, )) return_val.append("""<tr> <th>recid</th> <th>cr. date</th> <th>mod. date</th> <th>DOI</th> <th>XML</th> <th>PDF</th> <th>PDF/A</th> <th>Complete record?</th> <th>arXiv number</th> <th>Copyright: authors</th> <th>CC-BY</th> <th>Funded by SCOAP3</th> <th>arXiv category</th> <th>notes</th> <th>First delivery</th> <th>First AB delivery</th> <th>Last modification</th> <th>PDF/A upload</th> <th>DOI registration</th> <th>Delivery diff</th> <th>PDF/A diff</th> <th>Publication online</th> <th>Pub. online diff</th> </tr>""") return ''.join(return_val)
def setUp(self): """ webalert - prepare test alerts """ from invenio import alert_engine from invenio.search_engine import get_creation_date # TODO: test alerts for baskets too self.added_query_ids = [] q_query = """INSERT INTO query (type, urlargs) VALUES (%s,%s)""" q_user_query = """INSERT INTO user_query (id_user, id_query, date) VALUES (%%s,%(id_query)s,NOW())""" q_user_query_basket = """INSERT INTO user_query_basket (id_user, id_query, id_basket, frequency, date_creation, alert_name, notification, alert_recipient) VALUES (%%s,%(id_query)s,%%s,%%s,DATE(NOW()),%%s,%%s,%%s)""" parameters = { 'romeo 1': { 'query_params': ( 'r', 'c=Theses&c=Poetry', ), 'user_query_params': (5, ), 'user_query_basket_params': ( 5, 0, 'day', 'Romeo alert 1', 'y', '', ) }, 'juliet 1': { 'query_params': ( 'r', 'c=Theses&c=Poetry', ), 'user_query_params': (6, ), 'user_query_basket_params': ( 6, 0, 'day', 'Juliet alert 1', 'y', '', ) }, 'mailing list 1': { 'query_params': ( 'r', 'c=Theses&c=Poetry', ), 'user_query_params': (6, ), 'user_query_basket_params': ( 6, 0, 'day', 'Mailing list alert 1', 'y', CFG_SITE_ADMIN_EMAIL, ) }, 'juliet 2': { 'query_params': ( 'r', 'c=Theses', ), 'user_query_params': (6, ), 'user_query_basket_params': ( 6, 0, 'day', 'Juliet alert 2', 'y', '', ) }, } for params in parameters.values(): row_id = run_sql(q_query, params['query_params']) self.added_query_ids.append(row_id) run_sql(q_user_query % {'id_query': row_id}, params['user_query_params']) run_sql(q_user_query_basket % {'id_query': row_id}, params['user_query_basket_params']) # Run the alert for a date when we expect some result, and # record output for later analysis. # First get creation date of demo records: alert_date = datetime.datetime( *(time.strptime(get_creation_date(41, fmt="%Y-%m-%d"), '%Y-%m-%d') [:6])).date() + datetime.timedelta(days=1) # Prevent emails to be sent, raise verbosity: previous_cfg_webalert_debug_level = alert_engine.CFG_WEBALERT_DEBUG_LEVEL alert_engine.CFG_WEBALERT_DEBUG_LEVEL = 3 # Re-route standard output: previous_stdout = sys.stdout # Re-route standard output sys.stdout = alert_output = StringIO() # Run the alert alert_engine.run_alerts(date=alert_date) # Restore standard output and alert debug level sys.stdout = previous_stdout alert_engine.CFG_WEBALERT_DEBUG_LEVEL = previous_cfg_webalert_debug_level # Remove test alerts for query_id in self.added_query_ids: run_sql('DELETE FROM user_query_basket WHERE id_query=%s', (query_id, )) run_sql('DELETE FROM user_query WHERE id_query=%s', (query_id, )) run_sql('DELETE FROM query WHERE id=%s', (query_id, )) # Identify alerts, organize by name (hopefully unique for # these tests) self.alerts = {} for alert_message in alert_output.getvalue().split("+" * 80 + '\n'): if 'alert name: ' in alert_message: alert_name = alert_message.split('alert name: ')[1].split( '\n')[0] self.alerts[alert_name] = alert_message
def setUp(self): """ webalert - prepare test alerts """ # TODO: test alerts for baskets too self.added_query_ids = [] q_query = """INSERT INTO query (type, urlargs) VALUES (%s,%s)""" q_user_query = """INSERT INTO user_query (id_user, id_query, date) VALUES (%%s,%(id_query)s,NOW())""" q_user_query_basket = """INSERT INTO user_query_basket (id_user, id_query, id_basket, frequency, date_creation, alert_name, notification, alert_recipient) VALUES (%%s,%(id_query)s,%%s,%%s,DATE(NOW()),%%s,%%s,%%s)""" parameters = {'romeo 1': {'query_params': ('r', 'c=Theses&c=Poetry',), 'user_query_params': (5,), 'user_query_basket_params': (5, 0, 'day', 'Romeo alert 1', 'y', '',)}, 'juliet 1': {'query_params': ('r', 'c=Theses&c=Poetry',), 'user_query_params': (6,), 'user_query_basket_params': (6, 0, 'day', 'Juliet alert 1', 'y', '',)}, 'mailing list 1': {'query_params': ('r', 'c=Theses&c=Poetry',), 'user_query_params': (6,), 'user_query_basket_params': (6, 0, 'day', 'Mailing list alert 1', 'y', CFG_SITE_ADMIN_EMAIL,)}, 'juliet 2': {'query_params': ('r', 'c=Theses',), 'user_query_params': (6,), 'user_query_basket_params': (6, 0, 'day', 'Juliet alert 2', 'y', '',)}, } for params in parameters.values(): row_id = run_sql(q_query, params['query_params']) self.added_query_ids.append(row_id) run_sql(q_user_query % {'id_query': row_id}, params['user_query_params']) run_sql(q_user_query_basket % {'id_query': row_id}, params['user_query_basket_params']) # Run the alert for a date when we expect some result, and # record output for later analysis. # First get creation date of demo records: alert_date = datetime.datetime(*(time.strptime(get_creation_date(41, fmt="%Y-%m-%d"), '%Y-%m-%d')[:6])).date() + datetime.timedelta(days=1) # Prevent emails to be sent, raise verbosity: previous_cfg_webalert_debug_level = alert_engine.CFG_WEBALERT_DEBUG_LEVEL alert_engine.CFG_WEBALERT_DEBUG_LEVEL = 3 # Re-route standard output: previous_stdout = sys.stdout # Re-route standard output sys.stdout = alert_output = StringIO() # Run the alert alert_engine.run_alerts(date=alert_date) # Restore standard output and alert debug level sys.stdout = previous_stdout alert_engine.CFG_WEBALERT_DEBUG_LEVEL = previous_cfg_webalert_debug_level # Remove test alerts for query_id in self.added_query_ids: run_sql('DELETE FROM user_query_basket WHERE id_query=%s', (query_id,)) run_sql('DELETE FROM user_query WHERE id_query=%s', (query_id,)) run_sql('DELETE FROM query WHERE id=%s', (query_id,)) # Identify alerts, organize by name (hopefully unique for # these tests) self.alerts = {} for alert_message in alert_output.getvalue().split("+" * 80 + '\n'): if 'alert name: ' in alert_message: alert_name = alert_message.split('alert name: ')[1].split('\n')[0] self.alerts[alert_name] = alert_message