Example #1
0
def write_csv(req, dictionary, journal_list, f_date, t_date,
              created_or_modified_date):
    return_val = ''

    for key in journal_list:
        val = dictionary[key]
        papers = perform_request_search(p="date%s:%s->%s"
                                        % (created_or_modified_date,
                                           f_date, t_date),
                                        c=val)

        if papers == []:
            continue

        return_val += key + '\n'
        return_val += ';'.join(['recid', 'cr. date', 'mod. date', 'DOI',
                                'XML', 'PDF', 'PDF/A', 'Complete record?',
                                'arXiv number', 'Copyright: authors', 'CC-BY',
                                'Funded by SCOAP3', 'arXiv category', 'notes',
                                'First delivery', 'First AB delivery',
                                'Last modification', 'PDF/A upload',
                                'DOI registration', 'Delivery diff',
                                'PDF/A diff']) + '\n'

        for recid in papers:
            rec = get_record(recid)
            doi = get_doi(rec)
            first_del = None
            first_ab_del = None
            last_mod = None
            doi_reg = None
            pdfa_del = None
            first_del, first_ab_del, last_mod, doi_reg, pdfa_del = get_delivery_data(recid, doi)

            record_compl = is_complete_record(recid)
            return_val += ';'.join(str(item) for item in [str(recid),
                                   get_creation_date(recid),
                                   get_modification_date(recid),
                                   doi,
                                   has_or_had_format(recid, '.xml').lstrip('<b>').rstrip('</b>'),
                                   has_or_had_format(recid, '.pdf').lstrip('<b>').rstrip('</b>'),
                                   has_or_had_format(recid, '.pdf;pdfa').lstrip('<b>').rstrip('</b>'),
                                   str(check_complete_rec(record_compl)),
                                   get_arxiv(rec).lstrip('<b>').rstrip('</b>'),
                                   is_compliant(recid, 'authors').lstrip('<b>').rstrip('</b>'),
                                   is_compliant(recid, 'cc').lstrip('<b>').rstrip('</b>'),
                                   is_compliant(recid, 'scoap3').lstrip('<b>').rstrip('</b>'),
                                   is_compliant(recid, 'category').lstrip('<b>').rstrip('</b>'),
                                   str([rec_key for rec_key, rec_val in record_compl.iteritems() if not rec_val]),
                                   str(first_del),
                                   str(first_ab_del),
                                   str(last_mod),
                                   str(pdfa_del),
                                   str(doi_reg),
                                   check_24h_delivery(first_del, doi_reg),
                                   check_24h_delivery(pdfa_del, doi_reg)
                                   ])
            return_val += '\n'

    return return_val
Example #2
0
def index(req):
    req.content_type = 'text/csv; charset=utf-8'
    req.headers_out['content-disposition'] = ('attachment; filename=scoap3_records_info.csv')

    req.write("SCOAP3 record id; Journal; Creation date; Modification date; Title; Authors; Publication info\n")
    for key, value in JOURNALS.iteritems():
        recids = perform_request_search(c=value)
        for recid in recids:
            rec = get_record(recid)
            title = rec['245'][0][0][0][1].strip()
            creation_date = get_creation_date(recid)
            modification_date = get_modification_date(recid)
            authors = rec['100'][0][0][0][1]
            if '700' in rec:
                for author in rec['700']:
                    authors += ' / %s' % (author[0][0][1])
            publication_info = ''
            if '733' in rec:
                publication_info += "%s %s (%s) %s" % (rec['733'][0][0][0][1], rec['733'][0][0][1][1], rec['733'][0][0][2][1], rec['733'][0][0][3][1])
            if '024' in rec:
                publication_info += " %s" % (rec['024'][0][0][0][1],)
            if '037' in rec:
                publication_info += " %s" % (rec['037'][0][0][0][1],)


            req.writeline("%s; %s; %s; %s; %s; %s; %s\n" % (recid,
                                                            value,
                                                            creation_date,
                                                            modification_date,
                                                            title,
                                                            authors,
                                                            publication_info))
Example #3
0
    def create_latest_additions_info(self, rg=CFG_WEBSEARCH_INSTANT_BROWSE, ln=CFG_SITE_LANG):
        """
        Create info about latest additions that will be used for
        create_instant_browse() later.
        """
        self.latest_additions_info = []
        if self.nbrecs and self.reclist:
            # firstly, get last 'rg' records:
            recIDs = list(self.reclist)

            # FIXME: temporary hack in order to display tweaked latest
            # additions box for some CERN collections:
            if CFG_CERN_SITE:
                this_year = time.strftime("%Y", time.localtime())
                if self.name in ['CERN Yellow Reports']:
                    last_year = str(int(this_year) - 1)
                    # detect recIDs only from this and past year:
                    recIDs = list(self.reclist & \
                                  search_pattern(p='year:%s or year:%s' % \
                                                 (this_year, last_year)))
                elif self.name in ['Videos']:
                    # detect recIDs only from this year:
                    recIDs = list(self.reclist & \
                                  search_pattern(p='year:%s' % this_year))

            total = len(recIDs)
            to_display = min(rg, total)

            for idx in range(total-1, total-to_display-1, -1):
                recid = recIDs[idx]
                self.latest_additions_info.append({'id': recid,
                                                   'format': format_record(recid, "hb", ln=ln),
                                                   'date': get_creation_date(recid, fmt="%Y-%m-%d<br />%H:%i")})
        return
Example #4
0
def get_record_checks(req, recids):
    if recids == '':
        return ''

    recids = recids.split(',')
    return_val = []
    for rid in recids:
        try:
            recid = int(rid)
            rec = get_record(recid)
            doi = get_doi(rec)
            record_compl = is_complete_record(recid)
            return_val.append("""<tr>
                <td><a href="%s">%i</a></td>
                <td>%s</td>
                <td>%s</td>
                <td><a href="http://dx.doi.org/%s">%s</a></td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
            </tr>""" % (join(CFG_SITE_URL, 'record', str(recid)), recid,
                        get_creation_date(recid),
                        get_modification_date(recid),
                        doi, doi,
                        has_or_had_format(recid, '.xml'),
                        has_or_had_format(recid, '.pdf'),
                        has_or_had_format(recid, '.pdf;pdfa'),
                        check_complete_rec(record_compl),
                        get_arxiv(rec),
                        is_compliant(recid, "authors"),
                        is_compliant(recid, "cc"),
                        is_compliant(recid, "scoap3"),
                        str([rec_key for rec_key, rec_val
                             in record_compl.iteritems() if not rec_val])))
        except:
            recid = rid
            return_val.append("""<tr><th colspan="13" align="left">
                               <h2>%s</h2></th></tr>""" % (recid,))
            return_val.append("""<tr>
                <th>recid</th>
                <th>cr. date</th>
                <th>mod. date</th>
                <th>DOI</th>
                <th>XML</th>
                <th>PDF</th>
                <th>PDF/A</th>
                <th>Complete record?</th>
                <th>arXiv number</th>
                <th>Copyright: authors</th>
                <th>CC-BY</th>
                <th>Funded by SCOAP3</th>
                <th>notes</th>
            </tr>""")
    return ''.join(return_val)
Example #5
0
def write_csv(req, dictionary, journal_list, f_date, t_date,
              created_or_modified_date):
    return_val = ''

    for key in journal_list:
        val = dictionary[key]
        papers = perform_request_search(p="date%s:%s->%s"
                                        % (created_or_modified_date,
                                           f_date, t_date),
                                        c=val)

        if papers == []:
            continue

        return_val += key + '\n'
        return_val += ';'.join(['recid', 'cr. date', 'mod. date', 'DOI',
                                'XML', 'PDF', 'PDF/A', 'Complete record?',
                                'arXiv number', 'Copyright: authors', 'CC-BY',
                                'Funded by SCOAP3', 'arXiv category', 'notes',
                                'First delivery', 'First AB delivery',
                                'Last modification', 'PDF/A upload',
                                'DOI registration', 'Delivery diff',
                                'PDF/A diff']) + '\n'

        for recid in papers:
            rec = get_record(recid)
            doi = get_doi(rec)
            first_del = None
            first_ab_del = None
            last_mod = None
            doi_reg = None
            pdfa_del = None
            first_del, first_ab_del, last_mod, doi_reg, pdfa_del = get_delivery_data(recid, doi)

            record_compl = is_complete_record(recid)
            return_val += ';'.join(str(item) for item in [str(recid),
                                   get_creation_date(recid),
                                   get_modification_date(recid),
                                   doi,
                                   has_or_had_format(recid, '.xml').lstrip('<b>').rstrip('</b>'),
                                   has_or_had_format(recid, '.pdf').lstrip('<b>').rstrip('</b>'),
                                   has_or_had_format(recid, '.pdf;pdfa').lstrip('<b>').rstrip('</b>'),
                                   str(check_complete_rec(record_compl)),
                                   get_arxiv(rec).lstrip('<b>').rstrip('</b>'),
                                   is_compliant(recid, 'authors').lstrip('<b>').rstrip('</b>'),
                                   is_compliant(recid, 'cc').lstrip('<b>').rstrip('</b>'),
                                   is_compliant(recid, 'scoap3').lstrip('<b>').rstrip('</b>'),
                                   is_compliant(recid, 'category').lstrip('<b>').rstrip('</b>'),
                                   str([rec_key for rec_key, rec_val in record_compl.iteritems() if not rec_val]),
                                   str(first_del),
                                   str(first_ab_del),
                                   str(last_mod),
                                   str(pdfa_del),
                                   str(doi_reg),
                                   check_24h_delivery(first_del, doi_reg),
                                   check_24h_delivery(pdfa_del, doi_reg)
                                   ])
            return_val += '\n'

    return return_val
Example #6
0
def get_list():
    papers = []
    prev_version = perform_request_search()

    for recid in prev_version:
        rec = get_record(recid)
        doi = None
        arxiv_id = None
        try:
            if ('2', 'DOI') in rec['024'][0][0]:
                for t in rec['024'][0][0]:
                    if 'a' in t:
                        doi = t[1]
                if not doi:
                    print "No DOI for record: %i" % (recid, )
            else:
                print "No DOI for record: %i" % (recid, )
        except:
            print "No DOI for record: %i" % (recid, )

        checksum, url, url_type = get_pdf(recid)

        if '037' in rec.keys():
            if ('9', 'arXiv') in rec.get('037')[0][0]:
                for t in rec.get('037')[0][0]:
                    if 'a' in t:
                        arxiv_id = t[1]

        papers.append((recid, arxiv_id, get_creation_date(recid), checksum, url, url_type, doi))
    return papers
Example #7
0
def national_authors_list(req, search_country):
    req.content_type = 'text/csv; charset=utf-8'
    req.headers_out['content-disposition'] = (
        'attachment; '
        'filename=national_authors_list.csv')
    ids = perform_request_search(p="country:'%s'" % (search_country, ))
    req.write(
        "#;RECID;Title;Creation date;Publisher;Total # of authors;Authors name(given country only);Authors country;Authors affiliations\n"
    )

    for number, recid in enumerate(ids):
        title = record_get_field_value(get_record(recid), '245', code="a")
        del_date = get_creation_date(recid)
        publisher = record_get_field_value(get_record(recid), '980', code="b")
        rec = get_record(recid)

        authors = []
        author_count = 0
        for f in ['100', '700']:
            if f in rec:
                for auth in rec[f]:
                    author_count += 1
                    aff = ''
                    name = ''
                    country = ''
                    hit = 0
                    for subfield, value in auth[0]:
                        if subfield == 'a':
                            name = value
                        if subfield in ['v', 'u']:
                            if aff:
                                aff += ', ' + value
                            else:
                                aff = value
                        if subfield == 'w':
                            if country:
                                country += ', ' + value
                            else:
                                country = value
                            if search_country in value:
                                hit = 1

                    if hit:
                        authors.append({
                            'name': name,
                            'affiliation': aff.replace('\n', ''),
                            'country': country
                        })

        for i, author in enumerate(authors):
            if i == 0:
                req.write("%s;%s;%s;%s;%s;%s;%s;%s;%s\n" %
                          (number + 1, recid, title.replace('\n', ''),
                           del_date, publisher, author_count, author['name'],
                           author['country'], author['affiliation']))
            else:
                req.write(
                    ";;;;;;%s;%s;%s\n" %
                    (author['name'], author['country'], author['affiliation']))
    def create_latest_additions_info(self, rg=CFG_WEBSEARCH_INSTANT_BROWSE, ln=CFG_SITE_LANG):
        """
        Create info about latest additions that will be used for
        create_instant_browse() later.
        """
        self.latest_additions_info = []
        if self.nbrecs and self.reclist:
            # firstly, get last 'rg' records:
            recIDs = list(self.reclist)
            of = 'hb'
            # CERN hack begins: tweak latest additions for selected collections:
            if CFG_CERN_SITE:
                # alter recIDs list for some CERN collections:
                this_year = time.strftime("%Y", time.localtime())
                if self.name in ['CERN Yellow Reports','Videos']:
                    last_year = str(int(this_year) - 1)
                    # detect recIDs only from this and past year:
                    recIDs = list(self.reclist & \
                                  search_pattern_parenthesised(p='year:%s or year:%s' % \
                                                 (this_year, last_year)))
                elif self.name in ['VideosXXX']:
                    # detect recIDs only from this year:
                    recIDs = list(self.reclist & \
                                  search_pattern_parenthesised(p='year:%s' % this_year))
                elif self.name == 'CMS Physics Analysis Summaries' and \
                         1281585 in self.reclist:
                    # REALLY, REALLY temporary hack
                    recIDs = list(self.reclist)
                    recIDs.remove(1281585)
                # apply special filters:
                if self.name in ['Videos']:
                    # select only videos with movies:
                    recIDs = list(intbitset(recIDs) & \
                                  search_pattern_parenthesised(p='collection:"PUBLVIDEOMOVIE"'))
                    of = 'hvp'
                # sort some CERN collections specially:
                if self.name in ['Videos',
                                 'Video Clips',
                                 'Video Movies',
                                 'Video News',
                                 'Video Rushes',
                                 'Webcast',
                                 'ATLAS Videos',
                                 'Restricted Video Movies',
                                 'Restricted Video Rushes',
                                 'LHC First Beam Videos',
                                 'CERN openlab Videos']:
                    recIDs = sort_records(None, recIDs, '269__c')
            # CERN hack ends.

            total = len(recIDs)
            to_display = min(rg, total)

            for idx in range(total-1, total-to_display-1, -1):
                recid = recIDs[idx]
                self.latest_additions_info.append({'id': recid,
                                                   'format': format_record(recid, of, ln=ln),
                                                   'date': get_creation_date(recid, fmt="%Y-%m-%d<br />%H:%i")})
        return
def format(bfo, format='%Y-%m-%d'):
    '''
    Get the record creation date.
    @param format: The date format in MySQL syntax
    '''
    recID = bfo.recID
    out = get_creation_date(recID, format)
    return out
Example #10
0
def format_element(bfo, format='%Y-%m-%d'):
    '''
    Get the record creation date.
    @param format: The date format in MySQL syntax
    '''
    recID = bfo.recID
    out = get_creation_date(recID, format)
    return out
Example #11
0
def format_element(bfo, format="%Y-%m-%d"):
    """
    Get the record creation date.
    @param format: The date format in MySQL syntax
    """
    recID = bfo.recID
    out = get_creation_date(recID, format)
    return out
    def create_latest_additions_info(self, rg=CFG_WEBSEARCH_INSTANT_BROWSE, ln=CFG_SITE_LANG):
        """
        Create info about latest additions that will be used for
        create_instant_browse() later.
        """
        self.latest_additions_info = []
        if self.nbrecs and self.reclist:
            # firstly, get last 'rg' records:
            recIDs = list(self.reclist)

            # CERN hack begins: tweak latest additions for selected collections:
            if CFG_CERN_SITE:
                # alter recIDs list for some CERN collections:
                this_year = time.strftime("%Y", time.localtime())
                if self.name in ['CERN Yellow Reports','Videos']:
                    last_year = str(int(this_year) - 1)
                    # detect recIDs only from this and past year:
                    recIDs = list(self.reclist & \
                                  search_pattern(p='year:%s or year:%s' % \
                                                 (this_year, last_year)))
                elif self.name in ['VideosXXX']:
                    # detect recIDs only from this year:
                    recIDs = list(self.reclist & \
                                  search_pattern(p='year:%s' % this_year))
                elif self.name == 'CMS Physics Analysis Summaries' and \
                         1281585 in self.reclist:
                    # REALLY, REALLY temporary hack
                    recIDs = list(self.reclist)
                    recIDs.remove(1281585)
                # apply special filters:
                if self.name in ['Videos']:
                    # select only videos with movies:
                    recIDs = list(intbitset(recIDs) & \
                                  search_pattern(p='collection:"PUBLVIDEOMOVIE"'))
                # sort some CERN collections specially:
                if self.name in ['Videos',
                                 'Video Clips',
                                 'Video Movies',
                                 'Video News',
                                 'Video Rushes',
                                 'Webcast',
                                 'ATLAS Videos',
                                 'Restricted Video Movies',
                                 'Restricted Video Rushes',
                                 'LHC First Beam Videos',
                                 'CERN openlab Videos']:
                    recIDs = sort_records(None, recIDs, '269__c')
            # CERN hack ends.

            total = len(recIDs)
            to_display = min(rg, total)

            for idx in range(total-1, total-to_display-1, -1):
                recid = recIDs[idx]
                self.latest_additions_info.append({'id': recid,
                                                   'format': format_record(recid, "hb", ln=ln),
                                                   'date': get_creation_date(recid, fmt="%Y-%m-%d<br />%H:%i")})
        return
Example #13
0
def get_general_delivery(recid, doi):
    delivery_data = run_sql("select * from doi where doi='%s'" % (doi,))
    if delivery_data:
        first_del = get_creation_date(recid)
        first_ab_del = None
        last_mod = get_modification_date(recid)
        doi_reg = delivery_data[0][1]
        pdfa_del = None
        return (first_del, first_ab_del, last_mod, doi_reg, pdfa_del)
    else:
        return None
Example #14
0
def get_general_delivery(recid, doi):
    delivery_data = run_sql("select * from doi where doi='%s'" % (doi,))
    if delivery_data:
        first_del = get_creation_date(recid)
        first_ab_del = None
        last_mod = get_modification_date(recid)
        doi_reg = delivery_data[0][1]
        pdfa_del = None
        return (first_del, first_ab_del, last_mod, doi_reg, pdfa_del)
    else:
        return None
Example #15
0
def national_authors_list(req, search_country):
    req.content_type = 'text/csv; charset=utf-8'
    req.headers_out['content-disposition'] = ('attachment; '
                                              'filename=national_authors_list.csv')
    ids = perform_request_search(p="country:'%s'" % (search_country,))
    req.write("#;RECID;Title;Creation date;Publisher;Total # of authors;Authors name(given country only);Authors country;Authors affiliations\n")

    for number, recid in enumerate(ids):
        doi = record_get_field_value(get_record(recid), '024', ind1="7", code="a")
        journal = record_get_field_value(get_record(recid), '773', code="p")
        title = record_get_field_value(get_record(recid), '245', code="a")
        del_date = get_creation_date(recid)
        publisher = record_get_field_value(get_record(recid), '980', code="b")
        if not publisher:
            publisher = record_get_field_value(get_record(recid), '541', code="a")
        rec = get_record(recid)

        authors = []
        author_count = 0
        for f in ['100', '700']:
            if f in rec:
                for auth in rec[f]:
                    author_count += 1
                    aff = ''
                    name = ''
                    country = ''
                    hit = 0
                    for subfield, value in auth[0]:
                        if subfield == 'a':
                            name = value
                        if subfield in ['v', 'u']:
                            if aff:
                                aff += ', ' + value
                            else:
                                aff = value
                        if subfield == 'w':
                            if country:
                                country += ', ' + value
                            else:
                                country = value
                            if search_country in value:
                                hit = 1

                    if hit:
                        authors.append({'name': name,
                                        'affiliation': aff.replace('\n',''),
                                        'country': country})

        for i, author in enumerate(authors):
            if i == 0:
                req.write("%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s\n" % (number+1, recid, title.replace('\n',''), del_date, publisher, author_count, author['name'], author['country'], author['affiliation']))
            else:
                req.write("||||||||%s|%s|%s\n" % (author['name'], author['country'], author['affiliation']))
Example #16
0
def generate_mediaexport_album(recid, resource_id, json_format=True):
    """Return the report number of associate images.

    :param str recid: The record id.
    :param str resource_id: The report number.
    :param str json_format: If true, returns JSON dump, otherwise a dictionary
    """
    # Fileds that are required
    MEDIA_CONFIG = {
        'title_en': ('245', ' ', ' ', 'a'),
        'title_fr': ('246', ' ', '1', 'a'),
    }
    bibarchive = BibRecDocs(recid)
    bibarchive_with_deleted = BibRecDocs(recid, deleted_too=True)
    bibdocs = bibarchive.list_bibdocs()
    doc_numbers = [(bibdoc.get_id(), bibdoc.get_docname(), bibdoc) for bibdoc in bibarchive_with_deleted.list_bibdocs()]
    doc_numbers.sort()
    # Calculate the size
    bibdoc_size = len(bibdocs)
    # Get the record
    record = get_record(recid)
    # Build the response
    entry = {}

    for key in MEDIA_CONFIG:
        entry[key] = record_get_field_value(record, *MEDIA_CONFIG[key])

    entry['id'] = resource_id
    entry['record_id'] = str(recid)
    entry['entry_date'] = get_creation_date(recid)
    entry['total'] = bibdoc_size
    entry['type'] = 'album'
    entry['images'] = []

    # Foreach doc create the corresponding report number
    for (docid, docname, bibdoc) in doc_numbers:
        if not bibdoc.deleted_p():
            bibdoc_number = doc_numbers.index((bibdoc.get_id(), bibdoc.get_docname(), bibdoc)) + 1
            image = generate_mediaexport(recid, True, resource_id, bibdoc_number, False)
            image['tirage_id'] = bibdoc_number
            image['id'] = '{0}-{1}'.format(image['id'], bibdoc_number)
            entry['images'].append(image)

    final = {}
    final['entries'] = [{'entry': entry}]

    if not CFG_JSON_AVAILABLE:
        return ''

    if json_format:
        return json.dumps(final)
    else:
        return final
Example #17
0
def format_element(bfo, format='%Y-%m-%d', date_format='%Y-%m-%d'):
    '''
    Get the record creation date.
    <b>Note:</b> parameter <code>format</code> is deprecated

    @param date_format: The date format in MySQL syntax
    '''
    recID = bfo.recID

    # Let's be gentle and backward compatible while "format" is here:
    if date_format == '%Y-%m-%d' and format != '%Y-%m-%d':
        date_format = format
    out = get_creation_date(recID, date_format)
    return out
Example #18
0
def format_element(bfo, format='%Y-%m-%d', date_format='%Y-%m-%d'):
    '''
    Get the record creation date.
    <b>Note:</b> parameter <code>format</code> is deprecated

    @param date_format: The date format in MySQL syntax
    '''
    recID = bfo.recID

    # Let's be gentle and backward compatible while "format" is here:
    if date_format == '%Y-%m-%d' and format != '%Y-%m-%d':
        date_format = format
    out = get_creation_date(recID, date_format)
    return out
def format_element(bfo, format='%Y-%m-%d', date_format='%Y-%m-%d'):
    '''
    Get the record modification date.
    <b>Note:</b> parameter <code>format</code> is deprecated

    @param date_format: The date format in MySQL syntax
    '''
    _ = gettext_set_language(bfo.lang)
    recID = bfo.recID

    return _(
        "Record created on %(creation_date)s, modified on %(modification_date)s"
    ) % {
        'creation_date': get_creation_date(recID, "%Y-%m-%d"),
        'modification_date': get_modification_date(recID, "%Y-%m-%d")
    }
Example #20
0
def bst_get_new_ilo_publications(number_results_to_display=5):
    """
    Bibtasklet responsible of the generation of the list
    containing the most recent ILO publications and to update
    automatically the query to get the most recent ILO publications.
    @param number_results_to_display: number of results to display
    to users in main page.
    """

    task_update_progress(
        "Start updating query for collection Latest publications by ILO")
    # get current month and get the 2 previous ones
    now = datetime.datetime.now()
    month_1 = (now + dateutil.relativedelta.relativedelta(months=-1)).month
    month_2 = (now + dateutil.relativedelta.relativedelta(months=-2)).month
    # update also tab collection where id=113
    dbquery = """(946__d:2013-%s-* or 946__d:2013-%s-* or 946__d:2013-%s-*) and (997__a:2012 or 997__a:2013) and (992__a:"ILO publication") not callno:GB.* not callno:NYP""" % (
        now.month, month_1, month_2)
    query = """update collection set dbquery='%s' where id=113;""" % dbquery
    run_sql(query)
    task_update_progress(
        "Finished updating query for collection Latest publications by ILO")

    task_update_progress("Start calculating new ILO publications")
    ILO_publications_recids = perform_request_search(p='%s' % dbquery)
    dict_creation_date_per_recid = {}
    for recid in ILO_publications_recids:
        dict_creation_date_per_recid.update(
            {recid: get_creation_date(recid, fmt="%Y-%m-%d %H:%i:%S")})

    sorted_dict_creation_date_per_recid = sorted(
        dict_creation_date_per_recid.items(), key=lambda x: x[1])
    new_ilo_publications = sorted_dict_creation_date_per_recid[
        -int(number_results_to_display):]
    new_ilo_publications.reverse()
    new_ilo_publications_recids = [t[0] for t in new_ilo_publications]
    new_ilo_publications_file = open(CFG_TMPDIR + "/new_ILO_publications", "w")
    new_ilo_publications_file.write(repr(new_ilo_publications_recids))
    new_ilo_publications_file.close()
    task_update_progress("Finished calculating new ILO publications")

    return dbquery
Example #21
0
def get_delivery_data(recid, doi):
    first_del = None
    first_ab_del = None
    last_mod = None
    doi_reg = None
    pdfa_del = None
    pub_date = None

    delivery_data = get_detiled_package_delivery(doi)
    if delivery_data:
        tmp = delivery_data[0]
        if not tmp:
            tmp = get_creation_date(recid)
        return tmp, delivery_data[1], delivery_data[2], delivery_data[3], delivery_data[4], delivery_data[5]
    else:
        delivery_data = get_general_delivery(recid, doi)
        if delivery_data:
            return delivery_data[0], delivery_data[1], delivery_data[2], delivery_data[3], delivery_data[4], delivery_data[5]

    return first_del, first_ab_del, last_mod, doi_reg, pdfa_del, pub_date
Example #22
0
def write_csv(req, dictionary, journal_list, f_date, t_date,
              created_or_modified_date):

    return_val = ''

    for key in journal_list:
        val = dictionary[key]
        papers = perform_request_search(p="date%s:%s->%s"
                                        % (created_or_modified_date,
                                           f_date, t_date),
                                        c=val)

        if papers == []:
            continue

        return_val += key
        return_val += ','.join(['recid', 'cr. date', 'mod. date', 'DOI',
                                'XML', 'PDF', 'PDF/A', 'Complete record?',
                                'arXiv number', 'Copyright: authors', 'CC-BY',
                                'Funded by SCOAP3', 'notes']) + '\n'

        for recid in papers:
            rec = get_record(recid)
            doi = get_doi(rec)
            record_compl = is_complete_record(recid)
            return_val += ','.join(str(item) for item in [str(recid),
                                   get_creation_date(recid),
                                   get_modification_date(recid),
                                   doi,
                                   has_or_had_format(recid, '.xml').lstrip('<b>').rstrip('</b>'),
                                   has_or_had_format(recid, '.pdf').lstrip('<b>').rstrip('</b>'),
                                   has_or_had_format(recid, '.pdf;pdfa').lstrip('<b>').rstrip('</b>'),
                                   str(check_complete_rec(record_compl)),
                                   get_arxiv(rec).lstrip('<b>').rstrip('</b>'),
                                   is_compliant(recid, 'authors').lstrip('<b>').rstrip('</b>'),
                                   is_compliant(recid, 'cc').lstrip('<b>').rstrip('</b>'),
                                   is_compliant(recid, 'scoap3').lstrip('<b>').rstrip('</b>'),
                                   str([rec_key for rec_key, rec_val in record_compl.iteritems() if not rec_val])])
            return_val += '\n'

    return return_val
Example #23
0
def get_delivery_data(recid, doi):
    first_del = None
    first_ab_del = None
    last_mod = None
    doi_reg = None
    pdfa_del = None
    pub_date = None

    delivery_data = get_detiled_package_delivery(doi)
    if delivery_data:
        tmp = delivery_data[0]
        if not tmp:
            tmp = get_creation_date(recid)
        return tmp, delivery_data[1], delivery_data[2], delivery_data[
            3], delivery_data[4], delivery_data[5]
    else:
        delivery_data = get_general_delivery(recid, doi)
        if delivery_data:
            return delivery_data[0], delivery_data[1], delivery_data[
                2], delivery_data[3], delivery_data[4], delivery_data[5]

    return first_del, first_ab_del, last_mod, doi_reg, pdfa_del, pub_date
def index(req):
    req.content_type = 'text/csv; charset=utf-8'
    req.headers_out['content-disposition'] = (
        'attachment; filename=scoap3_records_info.csv')

    req.write(
        "SCOAP3 record id; Journal; Creation date; Modification date; Title; Authors; Publication info\n"
    )
    for key, value in JOURNALS.iteritems():
        recids = perform_request_search(c=value)
        for recid in recids:
            rec = get_record(recid)
            if '245' in rec:
                title = rec['245'][0][0][0][1].strip()
            else:
                title = ""
            creation_date = get_creation_date(recid)
            modification_date = get_modification_date(recid)
            if '100' in rec:
                authors = rec['100'][0][0][0][1]
            else:
                authors = ""
            if '700' in rec:
                for author in rec['700']:
                    authors += ' / %s' % (author[0][0][1])
            publication_info = ''
            if '733' in rec:
                publication_info += "%s %s (%s) %s" % (
                    rec['733'][0][0][0][1], rec['733'][0][0][1][1],
                    rec['733'][0][0][2][1], rec['733'][0][0][3][1])
            if '024' in rec:
                publication_info += " %s" % (rec['024'][0][0][0][1], )
            if '037' in rec:
                publication_info += " %s" % (rec['037'][0][0][0][1], )

            req.write("%s; %s; %s; %s; %s; %s; %s\n" %
                      (recid, value, creation_date, modification_date, title,
                       authors, publication_info))
def check_records(records):
    for record in records:
        # adds missing data in year field
        year = record_get_field_value(record, '773', code='y')
        if not year:
            for position, value in record.iterfield('773__y'):
                record.amend_field(
                    position,
                    get_creation_date(record_get_field_value(record, '001'),
                                      '%Y'))

        # remove empty subfields
        if '773' in record:
            for subfield, value in record['773'][0][0]:
                if not value or value == '-':
                    for position, val in record.iterfield('773__%s' %
                                                          (subfield, )):
                        record.delete_field(
                            position,
                            'Deleteing empty field: %s' % (subfield, ))
        else:
            write_message("Missing 773 field in record %s" %
                          record_get_field_value(record, '001'))
Example #26
0
def generate_mediaexport(recid, is_image, resource_id, tirage, wrapped, json_format=True):
    """Generates the JSON with the info needed to export a media resource to  CERN-Drupal"""
    """Mandatory fields to export: title_en, title_fr, caption_en, caption_fr,
                                   copyright_holder, copyright_date, attribution (image),
                                   keywords (image), directors (video), producer (video)
    """

    MEDIA_CONFIG = {'title_en':         ('245', ' ', ' ', 'a'),
                    'title_fr':         ('246', ' ', '1', 'a'),
                    'keywords':         ('653', '1', ' ', 'a'),
                    'copyright_holder': ('542', ' ', ' ', 'd'),
                    'copyright_date':   ('542', ' ', ' ', 'g'),
                    'license_url':      ('540', ' ', ' ', 'a'),
                    'license_desc':     ('540', ' ', ' ', 'b'),
                    'license_body':     ('540', ' ', ' ', 'u'),
                    'author':           ('100', ' ', ' ', 'a'),
                    'affiliation':      ('100', ' ', ' ', 'u'),
                    'directors':        ('700', ' ', ' ', 'a'),
                    'video_length':     ('300', ' ', ' ', 'a'),
                    'language':         ('041', ' ', ' ', 'a'),
                    'creation_date':    ('269', ' ', ' ', 'c'),
                    'abstract_en':      ('520', ' ', ' ', 'a'),
                    'abstract_fr':      ('590', ' ', ' ', 'a')}

    entry = {}
    record = get_record(recid)

    for key in MEDIA_CONFIG:
        entry[key] = record_get_field_value(record, *MEDIA_CONFIG[key])#.encode('utf-8')

    entry['id'] = resource_id
    entry['record_id'] = str(recid)
    entry['type'] = is_image and "image" or "video"
    entry['entry_date'] = get_creation_date(recid)

    toc_recid = 0
    toc_record = {}
    if not is_image and 'asset' in record_get_field_value(record, *('970', ' ', ' ', 'a')):
        toc_repnum = record_get_field_value(record, *('773', ' ', ' ', 'r'))
        if toc_repnum:
            try:
                toc_recid = search_pattern(p='reportnumber:"%s"' %toc_repnum)[0]
            except IndexError:
                pass

    #corner cases for copyright & licence
    if not entry['copyright_holder']:
        entry['copyright_holder'] = 'CERN'
    if not entry['license_body']:
        entry['license_body'] = 'CERN'
    if not entry['license_desc']:
        entry['license_desc'] = 'CERN'
    if not entry['license_url']:
        from invenio.bibknowledge import get_kb_mapping
        try:
            entry['license_url'] = get_kb_mapping(kb_name='LICENSE2URL', key=entry['license_desc'])['value']
        except KeyError:
            pass

    #keywords
    entry['keywords'] = ','.join(record_get_field_values(record, *MEDIA_CONFIG['keywords']))

    #attribution
    if not entry.get('author', '') and not entry.get('attribution', '') and toc_recid > 0:
        if not toc_record:
            toc_record = get_record(toc_recid)
        entry['author'] = record_get_field_value(toc_record, *MEDIA_CONFIG['author'])
        entry['affiliation'] = record_get_field_value(toc_record, *MEDIA_CONFIG['affiliation'])
        if not entry.get('directors', ''):
            entry['directors'] = ','.join(record_get_field_values(toc_record, *MEDIA_CONFIG['directors']))

    #photos
    if is_image:
        if entry['author']:
            entry['attribution'] = entry['author']
        if entry['affiliation']:
            entry['attribution'] += ': %s' % entry['affiliation']
        del entry['directors']
    else: #videos
        if entry['author']:
            entry['producer'] = entry['author']
        # Get all files from record
        files_field = ('856', '7', ' ', 'u')
        # Filter all that are images
        thumbnails = [
            image for image in record_get_field_values(record, *files_field)
            if 'jpg' in image
        ]
        # If exists get the first one
        if thumbnails:
            entry['thumbnail'] = thumbnails[0]


    del entry['author']
    del entry['affiliation']

    #
    #title
    if not entry['title_en'] and not entry['title_fr'] and toc_recid > 0:
        if not toc_record:
            toc_record = get_record(toc_recid)
        entry['title_en'] = record_get_field_value(toc_record, *MEDIA_CONFIG['title_en'])
        entry['title_fr'] = record_get_field_value(toc_record, *MEDIA_CONFIG['title_fr'])

    #crop, media storage, caption
    if is_image:
        entry['file_params'] = {'size': ['small', 'medium', 'large'], 'crop': False}

        if 'MediaArchive' in record_get_field_values(record, *('856', '7', ' ', '2')):
            entry['caption_en'] = get_photolab_image_caption(record, tirage)
            entry['caption_fr'] = ''
        else:
            brd = BibRecDocs(recid, deleted_too=True)
            doc_numbers = [(bibdoc.get_id(), bibdoc) for bibdoc in brd.list_bibdocs()]
            doc_numbers.sort()
            bibdoc = doc_numbers[tirage-1][1]
            entry['filename'] = brd.get_docname(bibdoc.get_id()) #bibdoc.get_docname()
            if 'crop' in [bibdocfile.get_subformat() for bibdocfile in bibdoc.list_latest_files()]:
                entry['file_params']['crop'] = True
            if not bibdoc.deleted_p():
                for bibdoc_file in bibdoc.list_latest_files():
                    entry['caption_en'] = bibdoc_file.get_comment()
                    entry['caption_fr'] = bibdoc_file.get_description()
                    if entry.get('caption_en', ''):
                        break

    if not entry.get('caption_en', ''):
        entry['caption_en'] = entry['abstract_en']
    if not entry.get('caption_fr', ''):
        entry['caption_fr'] = entry['abstract_fr']

    if is_image:
        del entry['language']
        del entry['video_length']

    # we don't need it
    del entry['abstract_en']
    del entry['abstract_fr']

    #make sure all mandatory fields are sent
    MANDATORY_FIELDS = ['title_en', 'title_fr', 'caption_en', 'caption_fr', 'copyright_holder', 'copyright_date']
    MANDATORY_FIELDS_IMAGE = MANDATORY_FIELDS + ['attribution', 'keywords']
    MANDATORY_FIELDS_VIDEO = MANDATORY_FIELDS + ['directors', 'producer', 'thumbnail']

    if is_image:
        mandatory_fields_all = MANDATORY_FIELDS_IMAGE
    else:
        mandatory_fields_all = MANDATORY_FIELDS_VIDEO

    for field in mandatory_fields_all:
        entry.setdefault(field, '')
    # In case we want to embed the object
    if wrapped:
        final = {}
        final['entries'] = [{'entry': entry}]

        if not CFG_JSON_AVAILABLE:
            return ''

        if json_format:
            return json.dumps(final)
        else:
            return final
    else:
        return entry
Example #27
0
def csu(req):
    req.content_type = 'text/csv; charset=utf-8'
    req.headers_out['content-disposition'] = ('attachment; filename=csu_records_info.csv')

    search_patterns = ["California Polytechnic State University",
                       "Carson",
                       "Dominguez Hills",
                       "Fresno",
                       "California State University Fullerton",
                       "California State University Long Beach",
                       "California State University, Los Angeles",
                       "Northridge",
                       "California State University, Sacramento",
                       "San Diego State University",
                       "sfsu"]

    def special_aff(author):
        affs = []
        au = ""
        name = ""
        for i in author:
            if i[0] == 'v' and value in i[1]:
                affs.append(i[1])
            if i[0] == 'a':
                name = i[1]
        if len(affs) > 0:
            au =  name + '('
            for aff in affs:
                au += aff + ', '
            au += '), '
        return au

    req.write("SCOAP3 record id; Journal; Creation date; Modification date; Title; Authors; Publication info\n")
    for value in search_patterns:
        recids = perform_request_search(p="affiliation:'%s'" % (value,))
        # req.write("%s; %s\n" % (value, len(recids) ))
        for recid in recids:
            rec = get_record(recid)
            if '245' in rec:
                title = rec['245'][0][0][0][1].strip()
            else:
                title = ""
            creation_date = get_creation_date(recid)
            modification_date = get_modification_date(recid)
            authors = ""
            if '100' in rec:
                authors += special_aff(rec['100'][0][0])
            if '700' in rec:
                for author in rec['700']:
                    authors += special_aff(author[0])
            publication_info = ''
            if '773' in rec:
                for p in rec['773'][0][0]:
                    if p[0] == 'p':
                        publication_info = p[1]
                publication_info += " %s" % (rec['024'][0][0][0][1],)
            if '037' in rec:
                publication_info += " %s" % (rec['037'][0][0][0][1],)

            req.write("%s; %s; %s; %s; %s; %s; %s\n" % (recid,
                                                        value,
                                                        creation_date,
                                                        modification_date,
                                                        title,
                                                        authors,
                                                        publication_info)) 
def csu(req):
    req.content_type = 'text/csv; charset=utf-8'
    req.headers_out['content-disposition'] = (
        'attachment; filename=csu_records_info.csv')

    search_patterns = [
        "California Polytechnic State University", "Carson", "Dominguez Hills",
        "Fresno", "California State University Fullerton",
        "California State University Long Beach",
        "California State University, Los Angeles", "Northridge",
        "California State University, Sacramento",
        "San Diego State University", "sfsu"
    ]

    def special_aff(author):
        affs = []
        au = ""
        name = ""
        for i in author:
            if i[0] == 'v' and value in i[1]:
                affs.append(i[1])
            if i[0] == 'a':
                name = i[1]
        if len(affs) > 0:
            au = name + '('
            for aff in affs:
                au += aff + ', '
            au += '), '
        return au

    req.write(
        "SCOAP3 record id; Journal; Creation date; Modification date; Title; Authors; Publication info\n"
    )
    for value in search_patterns:
        recids = perform_request_search(p="affiliation:'%s'" % (value, ))
        # req.write("%s; %s\n" % (value, len(recids) ))
        for recid in recids:
            rec = get_record(recid)
            if '245' in rec:
                title = rec['245'][0][0][0][1].strip()
            else:
                title = ""
            creation_date = get_creation_date(recid)
            modification_date = get_modification_date(recid)
            authors = ""
            if '100' in rec:
                authors += special_aff(rec['100'][0][0])
            if '700' in rec:
                for author in rec['700']:
                    authors += special_aff(author[0])
            publication_info = ''
            if '773' in rec:
                for p in rec['773'][0][0]:
                    if p[0] == 'p':
                        publication_info = p[1]
                publication_info += " %s" % (rec['024'][0][0][0][1], )
            if '037' in rec:
                publication_info += " %s" % (rec['037'][0][0][0][1], )

            req.write("%s; %s; %s; %s; %s; %s; %s\n" %
                      (recid, value, creation_date, modification_date, title,
                       authors, publication_info))
Example #29
0
def write_csv(req, dictionary, journal_list, f_date, t_date,
              created_or_modified_date):

    return_val = ''

    for key in journal_list:
        val = dictionary[key]
        papers = perform_request_search(p="date%s:%s->%s"
                                        % (created_or_modified_date,
                                           f_date, t_date),
                                        c=val)

        if papers == []:
            continue

        return_val += key
        return_val += ';'.join(['recid', 'cr. date', 'mod. date', 'DOI',
                                'XML', 'PDF', 'PDF/A', 'Complete record?',
                                'arXiv number', 'Copyright: authors', 'CC-BY',
                                'Funded by SCOAP3', 'notes', 'First delivery',
                                'First AB delivery', 'Last modification',
                                'PDF/A upload', 'DOI registration',
                                'Delivery diff', 'PDF/A diff']) + '\n'

        for recid in papers:
            rec = get_record(recid)
            doi = get_doi(rec)
            first_del = None
            first_ab_del = None
            last_mod = None
            doi_reg = None
            pdfa_del = None
            delivery_data = run_sql("SELECT doi.creation_date AS 'doi_reg', package.name AS 'pkg_name', package.delivery_date AS 'pkg_delivery' FROM doi_package LEFT JOIN doi ON doi_package.doi=doi.doi LEFT JOIN package ON package.id=doi_package.package_id WHERE doi_package.doi=%s ORDER BY package.delivery_date ASC", (doi,), with_dict=True)
            if delivery_data:
                first_del = delivery_data[0]['pkg_delivery']
                first_ab_del = get_delivery_of_firts_ab_package(delivery_data)
                last_mod = delivery_data[-1]['pkg_delivery']
                doi_reg = delivery_data[0]['doi_reg']
                pdfa_del = get_delivery_of_firts_pdfa(delivery_data)

            record_compl = is_complete_record(recid)
            return_val += ';'.join(str(item) for item in [str(recid),
                                   get_creation_date(recid),
                                   get_modification_date(recid),
                                   doi,
                                   has_or_had_format(recid, '.xml').lstrip('<b>').rstrip('</b>'),
                                   has_or_had_format(recid, '.pdf').lstrip('<b>').rstrip('</b>'),
                                   has_or_had_format(recid, '.pdf;pdfa').lstrip('<b>').rstrip('</b>'),
                                   str(check_complete_rec(record_compl)),
                                   get_arxiv(rec).lstrip('<b>').rstrip('</b>'),
                                   is_compliant(recid, 'authors').lstrip('<b>').rstrip('</b>'),
                                   is_compliant(recid, 'cc').lstrip('<b>').rstrip('</b>'),
                                   is_compliant(recid, 'scoap3').lstrip('<b>').rstrip('</b>'),
                                   is_compliant(recid, 'category').lstrip('<b>').rstrip('</b>'),
                                   str([rec_key for rec_key, rec_val in record_compl.iteritems() if not rec_val]),
                                   str(first_del),
                                   str(first_ab_del),
                                   str(last_mod),
                                   str(pdfa_del),
                                   str(doi_reg),
                                   check_24h_delivery(first_ab_del, doi_reg),
                                   check_24h_delivery(pdfa_del, doi_reg)
                                   ])
            return_val += '\n'

    return return_val
Example #30
0
def format_element(bfo):
    """
    Displays the description of how users should cite
    any content of the archive. The citation includes:
    For blogs: "title".
    (record_creation_date). record_url
    Retrieved from the original "original_url"
    For blog posts: author. "title". Blog: "blog_title".
    (record_creation_date). record_url
    Retrieved from the original "original_url"
    For comments: author. Blog post: "post_title".
    (record_creation_date). record_url
    Retrieved from the original "original_url"
    """

    coll = bfo.fields('980__a')[0]
    recid = bfo.control_field('001')

    # let's get the fields we want to show
    if coll in ["BLOGPOST", "COMMENT"]:
        author = bfo.fields('100__a')[0]
        try:
            original_creation_date = bfo.fields('269__c')[0]
        except:
            original_creation_date = ""

    try:
        title = bfo.fields('245__a')[0]
    except:
        title = "Untitled"

    try:
        original_url = bfo.fields('520__u')[0]
    except:
        raise Exception("URL not found")

    # creation date of a record
    record_creation_date = get_creation_date(recid)
    # url in the archive
    record_url = CFG_SITE_URL + "/record/" + recid

    if coll == "BLOGPOST":
        # we will also show the blog's title of 
        # the corresponding blog post
        blog_recid = get_parent_blog(recid)
        blog_bfo = BibFormatObject(blog_recid)
        try:
            blog_title = blog_bfo.fields('245__a')[0]
        except:
            blog_title = 'Untitled'

        description = """<table style="border:1px solid black;"><tr><td>\
        <span><b>%s</b>. '%s'. Blog: '%s'. </br> \
        (%s). <i>'%s'</i> </br> \
        Retrieved from the original <i>'%s'</i><span></td></tr></table>""" \
        % (author, title, blog_title, record_creation_date, record_url, original_url)

    elif coll == "COMMENT":
        # we will also show the post's title of
        # the corresponding comment
        post_recid = get_parent_post(recid)
        post_bfo = BibFormatObject(post_recid)
        try:
            post_title = post_bfo.fields('245__a')[0]
        except:
            post_title = 'Untitled'

        description = """<table style="border:1px solid black;"><tr><td>\
        <span><b>%s. </b>Blog post: '%s'.</br> \
        (%s). <i>'%s'</i> </br> \
        Retrieved from the original <i>'%s'</i><span></td></tr></table>""" \
        % (author, post_title, record_creation_date, record_url, original_url)

    else: # coll == "BLOG"
        description = """<table style="border:1px solid black;"><tr><td>\
        <span>'%s' </br> \
        (%s). <i>'%s'</i> </br> \
        Retrieved from the original <i>'%s'</i><span></td></tr></table>""" \
        % (title, record_creation_date, record_url, original_url)

    out = """
        <script type="text/javascript">
        function displayCitationDescription(){
            var description = document.getElementById('description');
            var citation_link = document.getElementById('citation_link');
            if (description.style.display == 'none'){
                description.style.display = '';
                citation_link.innerHTML = "Hide citation description"
            } else {
                description.style.display = 'none';
                citation_link.innerHTML = "How to cite this"
            }
        }
        </script>
        """

    out += '<span id="description" style="">' + description + '</span>'
    out += '<a class="moreinfo" id="citation_link" \
            href="javascript:void(0)" onclick="displayCitationDescription()""></a>'
    out += '<script type="text/javascript">displayCitationDescription()</script>'

    return out
Example #31
0
def get_record_checks(req, recids):
    if recids == '':
        return ''

    recids = recids.split(',')
    return_val = []
    for rid in recids:
        try:
            recid = int(rid)
            rec = get_record(recid)
            doi = get_doi(rec)
            delivery_data = run_sql("SELECT doi.creation_date AS 'doi_reg', package.name AS 'pkg_name', package.delivery_date AS 'pkg_delivery' FROM doi_package LEFT JOIN doi ON doi_package.doi=doi.doi LEFT JOIN package ON package.id=doi_package.package_id WHERE doi_package.doi=%s ORDER BY package.delivery_date ASC",
                                    (doi,),
                                    with_dict=True)
            first_del = None
            first_ab_del = None
            last_mod = None
            doi_reg = None
            pdfa_del = None
            if delivery_data:
                first_del = delivery_data[0]['pkg_delivery']
                first_ab_del = get_delivery_of_firts_ab_package(delivery_data)
                last_mod = delivery_data[-1]['pkg_delivery']
                doi_reg = delivery_data[0]['doi_reg']
                pdfa_del = get_delivery_of_firts_pdfa(delivery_data)
            record_compl = is_complete_record(recid)
            return_val.append("""<tr>
                <td><a href="%s">%i</a></td>
                <td>%s</td>
                <td>%s</td>
                <td><a href="http://dx.doi.org/%s">%s</a></td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td %s>%s</td>
                <td %s>%s</td>
            </tr>""" % (join(CFG_SITE_URL, 'record', str(recid)), recid,
                        get_creation_date(recid),
                        get_modification_date(recid),
                        doi, doi,
                        has_or_had_format(recid, '.xml'),
                        has_or_had_format(recid, '.pdf'),
                        has_or_had_format(recid, '.pdf;pdfa'),
                        check_complete_rec(record_compl),
                        get_arxiv(rec),
                        is_compliant(recid, "authors"),
                        is_compliant(recid, "cc"),
                        is_compliant(recid, "scoap3"),
                        is_compliant(recid. "category"),
                        str([rec_key for rec_key, rec_val
                             in record_compl.iteritems() if not rec_val]),
                        str(first_del),
                        str(first_ab_del),
                        str(last_mod),
                        str(pdfa_del),
                        str(doi_reg),
                        format_24h_delivery(check_24h_delivery(first_del, doi_reg)),
                        check_24h_delivery(first_del, doi_reg),
                        format_24h_delivery(check_24h_delivery(pdfa_del, doi_reg)),
                        check_24h_delivery(pdfa_del, doi_reg)))
        except Exception:
            register_exception()
            recid = rid
            return_val.append("""<tr><th colspan="13" align="left">
                               <h2>%s</h2></th></tr>""" % (recid,))
            return_val.append("""<tr>
                <th>recid</th>
                <th>cr. date</th>
                <th>mod. date</th>
                <th>DOI</th>
                <th>XML</th>
                <th>PDF</th>
                <th>PDF/A</th>
                <th>Complete record?</th>
                <th>arXiv number</th>
                <th>Copyright: authors</th>
                <th>CC-BY</th>
                <th>Funded by SCOAP3</th>
                <th>Category</th>
                <th>notes</th>
                <th>First delivery</th>
                <th>First AB delivery</th>
                <th>Last modification</th>
                <th>PDF/A upload</th>
                <th>DOI registration</th>
                <th>Delivery diff</th>
                <th>PDF/A diff</th>
            </tr>""")
    return ''.join(return_val)
Example #32
0
def get_record_checks(req, recids):
    if recids == '':
        return ''

    recids = recids.split(',')
    return_val = []
    for rid in recids:
        try:
            recid = int(rid)
            rec = get_record(recid)
            doi = get_doi(rec)

            first_del, first_ab_del, last_mod, doi_reg, pdfa_del, pub_date = get_delivery_data(recid, doi)

            record_compl = is_complete_record(recid)
            return_val.append("""<tr>
                <td><a href="%s">%i</a></td>
                <td>%s</td>
                <td>%s</td>
                <td><a href="http://dx.doi.org/%s">%s</a></td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td %s>%s</td>
                <td %s>%s</td>
                <td>%s</td>
                <td %s>%s</td>
            </tr>""" % (join(CFG_SITE_URL, 'record', str(recid)), recid,
                        get_creation_date(recid),
                        get_modification_date(recid),
                        doi, doi,
                        has_or_had_format(recid, '.xml'),
                        has_or_had_format(recid, '.pdf'),
                        has_or_had_format(recid, '.pdf;pdfa'),
                        check_complete_rec(record_compl),
                        get_arxiv(rec),
                        is_compliant(recid, "authors"),
                        is_compliant(recid, "cc"),
                        is_compliant(recid, "scoap3"),
                        is_compliant(recid, "category"),
                        str([rec_key for rec_key, rec_val
                             in record_compl.iteritems() if not rec_val]),
                        str(first_del),
                        str(first_ab_del),
                        str(last_mod),
                        str(pdfa_del),
                        str(doi_reg),
                        format_24h_delivery(check_24h_delivery(first_del, doi_reg)),
                        check_24h_delivery(first_del, doi_reg),
                        format_24h_delivery(check_24h_delivery(pdfa_del, doi_reg)),
                        check_24h_delivery(pdfa_del, doi_reg),
                        str(pub_date),
                        format_24h_delivery(check_24h_delivery(first_del, pub_date)),
                        check_24h_delivery(first_del, pub_date)))
        except Exception:
            register_exception()
            recid = rid
            return_val.append("""<tr><th colspan="13" align="left">
                               <h2>%s</h2></th></tr>""" % (recid,))
            return_val.append("""<tr>
                <th>recid</th>
                <th>cr. date</th>
                <th>mod. date</th>
                <th>DOI</th>
                <th>XML</th>
                <th>PDF</th>
                <th>PDF/A</th>
                <th>Complete record?</th>
                <th>arXiv number</th>
                <th>Copyright: authors</th>
                <th>CC-BY</th>
                <th>Funded by SCOAP3</th>
                <th>arXiv category</th>
                <th>notes</th>
                <th>First delivery</th>
                <th>First AB delivery</th>
                <th>Last modification</th>
                <th>PDF/A upload</th>
                <th>DOI registration</th>
                <th>Delivery diff</th>
                <th>PDF/A diff</th>
        <th>Publication online</th>
        <th>Pub. online diff</th>
            </tr>""")
    return ''.join(return_val)
Example #33
0
def get_record_checks(req, recids):
    if recids == '':
        return ''

    recids = recids.split(',')
    return_val = []
    for rid in recids:
        try:
            recid = int(rid)
            rec = get_record(recid)
            doi = get_doi(rec)

            first_del, first_ab_del, last_mod, doi_reg, pdfa_del, pub_date = get_delivery_data(
                recid, doi)

            record_compl = is_complete_record(recid)
            return_val.append(
                """<tr>
                <td><a href="%s">%i</a></td>
                <td>%s</td>
                <td>%s</td>
                <td><a href="http://dx.doi.org/%s">%s</a></td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td %s>%s</td>
                <td %s>%s</td>
                <td>%s</td>
                <td %s>%s</td>
            </tr>""" %
                (join(CFG_SITE_URL, 'record', str(recid)), recid,
                 get_creation_date(recid), get_modification_date(recid), doi,
                 doi, has_or_had_format(
                     recid, '.xml'), has_or_had_format(
                         recid, '.pdf'), has_or_had_format(recid, '.pdf;pdfa'),
                 check_complete_rec(record_compl), get_arxiv(rec),
                 is_compliant(recid, "authors"), is_compliant(
                     recid, "cc"), is_compliant(
                         recid, "scoap3"), is_compliant(recid, "category"),
                 str([
                     rec_key for rec_key, rec_val in record_compl.iteritems()
                     if not rec_val
                 ]), str(first_del), str(first_ab_del), str(last_mod),
                 str(pdfa_del), str(doi_reg),
                 format_24h_delivery(check_24h_delivery(first_del, doi_reg)),
                 check_24h_delivery(first_del, doi_reg),
                 format_24h_delivery(check_24h_delivery(pdfa_del, doi_reg)),
                 check_24h_delivery(pdfa_del, doi_reg), str(pub_date),
                 format_24h_delivery(check_24h_delivery(first_del, pub_date)),
                 check_24h_delivery(first_del, pub_date)))
        except Exception:
            register_exception()
            recid = rid
            return_val.append("""<tr><th colspan="13" align="left">
                               <h2>%s</h2></th></tr>""" % (recid, ))
            return_val.append("""<tr>
                <th>recid</th>
                <th>cr. date</th>
                <th>mod. date</th>
                <th>DOI</th>
                <th>XML</th>
                <th>PDF</th>
                <th>PDF/A</th>
                <th>Complete record?</th>
                <th>arXiv number</th>
                <th>Copyright: authors</th>
                <th>CC-BY</th>
                <th>Funded by SCOAP3</th>
                <th>arXiv category</th>
                <th>notes</th>
                <th>First delivery</th>
                <th>First AB delivery</th>
                <th>Last modification</th>
                <th>PDF/A upload</th>
                <th>DOI registration</th>
                <th>Delivery diff</th>
                <th>PDF/A diff</th>
        <th>Publication online</th>
        <th>Pub. online diff</th>
            </tr>""")
    return ''.join(return_val)
    def setUp(self):
        """
        webalert - prepare test alerts
        """
        from invenio import alert_engine
        from invenio.search_engine import get_creation_date

        # TODO: test alerts for baskets too
        self.added_query_ids = []
        q_query = """INSERT INTO query (type, urlargs) VALUES (%s,%s)"""
        q_user_query = """INSERT INTO user_query (id_user, id_query, date) VALUES (%%s,%(id_query)s,NOW())"""
        q_user_query_basket = """INSERT INTO user_query_basket (id_user, id_query, id_basket,
                                             frequency, date_creation,
                                             alert_name, notification, alert_recipient)
                                             VALUES (%%s,%(id_query)s,%%s,%%s,DATE(NOW()),%%s,%%s,%%s)"""
        parameters = {
            'romeo 1': {
                'query_params': (
                    'r',
                    'c=Theses&c=Poetry',
                ),
                'user_query_params': (5, ),
                'user_query_basket_params': (
                    5,
                    0,
                    'day',
                    'Romeo alert 1',
                    'y',
                    '',
                )
            },
            'juliet 1': {
                'query_params': (
                    'r',
                    'c=Theses&c=Poetry',
                ),
                'user_query_params': (6, ),
                'user_query_basket_params': (
                    6,
                    0,
                    'day',
                    'Juliet alert 1',
                    'y',
                    '',
                )
            },
            'mailing list 1': {
                'query_params': (
                    'r',
                    'c=Theses&c=Poetry',
                ),
                'user_query_params': (6, ),
                'user_query_basket_params': (
                    6,
                    0,
                    'day',
                    'Mailing list alert 1',
                    'y',
                    CFG_SITE_ADMIN_EMAIL,
                )
            },
            'juliet 2': {
                'query_params': (
                    'r',
                    'c=Theses',
                ),
                'user_query_params': (6, ),
                'user_query_basket_params': (
                    6,
                    0,
                    'day',
                    'Juliet alert 2',
                    'y',
                    '',
                )
            },
        }

        for params in parameters.values():
            row_id = run_sql(q_query, params['query_params'])
            self.added_query_ids.append(row_id)
            run_sql(q_user_query % {'id_query': row_id},
                    params['user_query_params'])
            run_sql(q_user_query_basket % {'id_query': row_id},
                    params['user_query_basket_params'])

        # Run the alert for a date when we expect some result, and
        # record output for later analysis.
        # First get creation date of demo records:
        alert_date = datetime.datetime(
            *(time.strptime(get_creation_date(41, fmt="%Y-%m-%d"), '%Y-%m-%d')
              [:6])).date() + datetime.timedelta(days=1)
        # Prevent emails to be sent, raise verbosity:
        previous_cfg_webalert_debug_level = alert_engine.CFG_WEBALERT_DEBUG_LEVEL
        alert_engine.CFG_WEBALERT_DEBUG_LEVEL = 3
        # Re-route standard output:
        previous_stdout = sys.stdout  # Re-route standard output
        sys.stdout = alert_output = StringIO()
        # Run the alert
        alert_engine.run_alerts(date=alert_date)
        # Restore standard output and alert debug level
        sys.stdout = previous_stdout
        alert_engine.CFG_WEBALERT_DEBUG_LEVEL = previous_cfg_webalert_debug_level

        # Remove test alerts
        for query_id in self.added_query_ids:
            run_sql('DELETE FROM user_query_basket WHERE id_query=%s',
                    (query_id, ))
            run_sql('DELETE FROM user_query WHERE id_query=%s', (query_id, ))
            run_sql('DELETE FROM query WHERE id=%s', (query_id, ))

        # Identify alerts, organize by name (hopefully unique for
        # these tests)
        self.alerts = {}
        for alert_message in alert_output.getvalue().split("+" * 80 + '\n'):
            if 'alert name: ' in alert_message:
                alert_name = alert_message.split('alert name: ')[1].split(
                    '\n')[0]
                self.alerts[alert_name] = alert_message
    def setUp(self):
        """
        webalert - prepare test alerts
        """
        # TODO: test alerts for baskets too
        self.added_query_ids = []
        q_query = """INSERT INTO query (type, urlargs) VALUES (%s,%s)"""
        q_user_query = """INSERT INTO user_query (id_user, id_query, date) VALUES (%%s,%(id_query)s,NOW())"""
        q_user_query_basket = """INSERT INTO user_query_basket (id_user, id_query, id_basket,
                                             frequency, date_creation,
                                             alert_name, notification, alert_recipient)
                                             VALUES (%%s,%(id_query)s,%%s,%%s,DATE(NOW()),%%s,%%s,%%s)"""
        parameters = {'romeo 1': {'query_params': ('r', 'c=Theses&c=Poetry',),
                                  'user_query_params': (5,),
                                  'user_query_basket_params': (5, 0, 'day', 'Romeo alert 1', 'y', '',)},
                      'juliet 1': {'query_params': ('r', 'c=Theses&c=Poetry',),
                                   'user_query_params': (6,),
                                   'user_query_basket_params': (6, 0, 'day', 'Juliet alert 1', 'y', '',)},
                      'mailing list 1': {'query_params': ('r', 'c=Theses&c=Poetry',),
                                         'user_query_params': (6,),
                                         'user_query_basket_params': (6, 0, 'day', 'Mailing list alert 1', 'y', CFG_SITE_ADMIN_EMAIL,)},
                      'juliet 2': {'query_params': ('r', 'c=Theses',),
                                   'user_query_params': (6,),
                                   'user_query_basket_params': (6, 0, 'day', 'Juliet alert 2', 'y', '',)},
                      }

        for params in parameters.values():
            row_id = run_sql(q_query, params['query_params'])
            self.added_query_ids.append(row_id)
            run_sql(q_user_query % {'id_query': row_id}, params['user_query_params'])
            run_sql(q_user_query_basket % {'id_query': row_id}, params['user_query_basket_params'])

        # Run the alert for a date when we expect some result, and
        # record output for later analysis.
        # First get creation date of demo records:
        alert_date = datetime.datetime(*(time.strptime(get_creation_date(41, fmt="%Y-%m-%d"),
                                                       '%Y-%m-%d')[:6])).date() + datetime.timedelta(days=1)
        # Prevent emails to be sent, raise verbosity:
        previous_cfg_webalert_debug_level = alert_engine.CFG_WEBALERT_DEBUG_LEVEL
        alert_engine.CFG_WEBALERT_DEBUG_LEVEL = 3
        # Re-route standard output:
        previous_stdout = sys.stdout # Re-route standard output
        sys.stdout = alert_output = StringIO()
        # Run the alert
        alert_engine.run_alerts(date=alert_date)
        # Restore standard output and alert debug level
        sys.stdout = previous_stdout
        alert_engine.CFG_WEBALERT_DEBUG_LEVEL = previous_cfg_webalert_debug_level

        # Remove test alerts
        for query_id in self.added_query_ids:
            run_sql('DELETE FROM user_query_basket WHERE id_query=%s', (query_id,))
            run_sql('DELETE FROM user_query WHERE id_query=%s', (query_id,))
            run_sql('DELETE FROM query WHERE id=%s', (query_id,))

        # Identify alerts, organize by name (hopefully unique for
        # these tests)
        self.alerts = {}
        for alert_message in alert_output.getvalue().split("+" * 80 + '\n'):
            if 'alert name: ' in alert_message:
                alert_name = alert_message.split('alert name: ')[1].split('\n')[0]
                self.alerts[alert_name] = alert_message