def bst_openaire_altmetric():
    """
    """
    recids = search_pattern(p="0->Z", f="0247_a")
    a = Altmetric()

    for recid in recids:
        try:
            # Check if we already have an Altmetric id
            sysno_inst = get_fieldvalues(recid, "035__9")
            if ['Altmetric'] in sysno_inst:
                continue

            doi_val = get_fieldvalues(recid, "0247_a")[0]
            json_res = a.doi(doi_val)

            rec = {}
            record_add_field(rec, "001", controlfield_value=str(recid))

            if json_res:
                record_add_field(rec, '035', subfields=[
                    ('a', str(json_res['altmetric_id'])), ('9', 'Altmetric')]
                )
                bibupload(rec, opt_mode='correct')
        except AltmetricHTTPException, e:
            register_exception(prefix='Altmetric error (status code %s): %s' %
                              (e.status_code, str(e)), alert_admin=False)
Example #2
0
def render_dataverse_dataset_html(recid, display_link=True):
    """ Rendering a single Dataverse dataset, both for the tab and the record
    @param display_link Indicates if a link to the data record should be displayed
    @type display_link boolean
    """
    from invenio.legacy.search_engine import get_fieldvalues

    # rendering the HTML code

    c = []  # collecting parts of the output
    c.append('<div style="background-color: #ececec; padding:10px;">')

    comments = get_fieldvalues(recid, "520__h")[0]
    publisher = get_fieldvalues(recid, "520__9")

    c.append("<br />")
    c.append("<b>Description: </b> " + comments + "<br />")
    c.append("<br />")

    link_txt = "Go to the record"
    if display_link:
        c.append('<a href="%s/record/%s">%s</a>' % (CFG_SITE_URL, str(recid), link_txt))

    c.append("<br /><br />")
    if publisher[0] == "Dataverse" and display_link == False:
        c.append('<div class="hepdataTablePlaceholder">')
        c.append('<table cellpadding="0" cellspacing="0" class="hepdataTable">')
        c.append('<tr><td style="text-align: center;">Preview not available</td>')
        c.append("</tr>")
        c.append("</table>")
        c.append("</div>")
        c.append("<br /><br />")

    c.append("</div>")
    return "\n".join(c)
def bst_openaire_altmetric():
    """
    """
    recids = search_pattern(p="0->Z", f="0247_a")
    a = Altmetric()

    for recid in recids:
        try:
            # Check if we already have an Altmetric id
            sysno_inst = get_fieldvalues(recid, "035__9")
            if ['Altmetric'] in sysno_inst:
                continue

            doi_val = get_fieldvalues(recid, "0247_a")[0]
            json_res = a.doi(doi_val)

            rec = {}
            record_add_field(rec, "001", controlfield_value=str(recid))

            if json_res:
                record_add_field(rec,
                                 '035',
                                 subfields=[('a',
                                             str(json_res['altmetric_id'])),
                                            ('9', 'Altmetric')])
                bibupload(rec, opt_mode='correct')
        except AltmetricHTTPException, e:
            register_exception(prefix='Altmetric error (status code %s): %s' %
                               (e.status_code, str(e)),
                               alert_admin=False)
Example #4
0
def render_dataverse_dataset_html(recid, display_link = True):
    """ Rendering a single Dataverse dataset, both for the tab and the record
    @param display_link Indicates if a link to the data record should be displayed
    @type display_link boolean
    """
    from invenio.legacy.search_engine import get_fieldvalues

    # rendering the HTML code

    c = [] #collecting parts of the output
    c.append("<div style=\"background-color: #ececec; padding:10px;\">")

    comments = get_fieldvalues(recid, '520__h')[0]
    publisher = get_fieldvalues(recid, '520__9')

    c.append("<br />")
    c.append("<b>Description: </b> " + comments + "<br />")
    c.append("<br />")

    link_txt = "Go to the record"
    if display_link:
        c.append("<a href=\"%s/record/%s\">%s</a>" % (CFG_SITE_URL, str(recid), link_txt))

    c.append("<br /><br />")
    if publisher[0] == 'Dataverse' and display_link == False:
        c.append("<div class=\"hepdataTablePlaceholder\">")
        c.append("<table cellpadding=\"0\" cellspacing=\"0\" class=\"hepdataTable\">")
        c.append("<tr><td style=\"text-align: center;\">Preview not available</td>")
        c.append("</tr>")
        c.append("</table>")
        c.append("</div>")
        c.append("<br /><br />")

    c.append("</div>")
    return "\n".join(c)
Example #5
0
def get_recid_and_reportnumber(recid=None,
                               reportnumber=None,
                               keep_original_reportnumber=True):
    """
    Given at least a recid or a reportnumber, this function will look into
    the system for the matching record and will return a normalized
    recid and the primary reportnumber.
    @raises ValueError: in case of no record matched.
    """
    if recid:
        ## Recid specified receives priority.
        recid = int(recid)
        values = get_fieldvalues(recid, CFG_PRIMARY_REPORTNUMBER)
        if values:
            ## Let's take whatever reportnumber is stored in the matching record
            reportnumber = values[0]
            return recid, reportnumber
        else:
            raise ValueError(
                "The record %s does not have a primary report number" % recid)
    elif reportnumber:
        ## Ok reportnumber specified, let's better try 1st with primary and then
        ## with other reportnumber
        recids = search_pattern(p='%s:"%s"' %
                                (CFG_PRIMARY_REPORTNUMBER, reportnumber))
        if not recids:
            ## Not found as primary
            recids = search_pattern(p='reportnumber:"%s"' % reportnumber)
        if len(recids) > 1:
            raise ValueError(
                'More than one record matches the reportnumber "%s": %s' %
                (reportnumber, ', '.join([str(i) for i in recids])))
        elif len(recids) == 1:
            recid = list(recids)[0]
            if keep_original_reportnumber:
                return recid, reportnumber
            else:
                reportnumbers = get_fieldvalues(recid,
                                                CFG_PRIMARY_REPORTNUMBER)
                if not reportnumbers:
                    raise ValueError(
                        "The matched record %s does not have a primary report number"
                        % recid)
                return recid, reportnumbers[0]
        else:
            raise ValueError(
                "No records are matched by the provided reportnumber: %s" %
                reportnumber)
    raise ValueError(
        "At least the recid or the reportnumber must be specified")
Example #6
0
def openaire_altmetric_update(recids, upload=True):
    """
    Retrieve Altmetric information for a record.
    """
    logger.debug("Checking Altmetric for recids %s" % recids)
    a = Altmetric()

    records = []
    for recid in recids:
        logger.debug("Checking Altmetric for recid %s" % recid)
        try:
            # Check if we already have an Altmetric id
            sysno_inst = get_fieldvalues(recid, "035__9")
            if ['Altmetric'] in sysno_inst:
                continue

            doi_val = get_fieldvalues(recid, "0247_a")[0]
            logger.debug("Found DOI %s" % doi_val)
            json_res = a.doi(doi_val)
            logger.debug("Altmetric response: %s" % json_res)

            rec = {}
            record_add_field(rec, "001", controlfield_value=str(recid))

            if json_res:
                record_add_field(rec,
                                 '035',
                                 subfields=[('a',
                                             str(json_res['altmetric_id'])),
                                            ('9', 'Altmetric')])
                records.append(rec)
        except AltmetricHTTPException as e:
            logger.warning(
                'Altmetric error for recid %s with DOI %s (status code %s): %s'
                % (recid, doi_val, e.status_code, str(e)))
            register_exception(prefix='Altmetric error (status code %s): %s' %
                               (e.status_code, str(e)),
                               alert_admin=False)
        except IndexError:
            logger.debug("No DOI found")
            pass

    if upload and records:
        if len(records) == 1:
            bibupload(record=records[0], file_prefix="altmetric")
        else:
            bibupload(collection=records, file_prefix="altmetric")

    return records
Example #7
0
def get_tweets(query):
    """
    This is how simple it is to fetch tweets :-)
    """
    ## We shall skip tweets that already in the system.
    previous_tweets = perform_request_search(p='980__a:"TWEET" 980__b:"%s"' %
                                             query,
                                             sf='970__a',
                                             so='a')
    if previous_tweets:
        ## A bit of an algorithm to retrieve the last Tweet ID that was stored
        ## in our records
        since_id = int(get_fieldvalues(previous_tweets[0], '970__a')[0])
    else:
        since_id = 0
    final_results = []
    results = list(
        _TWITTER_API.Search(query, rpp=100, since_id=since_id).results)
    final_results.extend(results)
    page = 1
    while len(results
              ) == 100:  ## We stop if there are less than 100 results per page
        page += 1
        results = list(
            _TWITTER_API.Search(query, rpp=100, since_id=since_id,
                                page=page).results)
        final_results.extend(results)
    return final_results
Example #8
0
def curate():
    """Index page with uploader and list of existing depositions."""
    from invenio.legacy.search_engine import get_fieldvalues
    action = request.values.get('action')
    community_id = request.values.get('collection')
    recid = request.values.get('recid', 0, type=int)
    # Allowed actions
    if action not in ['accept', 'reject', 'remove']:
        abort(400)

    # Check recid
    if not recid:
        abort(400)
    recid = int(recid)

    # Does community exists
    u = Community.query.filter_by(id=community_id).first()
    if not u:
        abort(400)
    # Check permission to perform action on this record
    # - Accept and reject is done by community owner
    # - Remove  is done by record owner
    if action in [
            'accept',
            'reject',
    ]:
        if u.id_user != current_user.get_id():
            abort(403)
    elif action == 'remove':
        try:
            email = get_fieldvalues(recid, '8560_f')[0]
            if email != current_user['email']:
                abort(403)
            # inform interested parties of removing collection/community
            curate_record.send(u,
                               action=action,
                               recid=recid,
                               user=current_user)
        except (IndexError, KeyError):
            abort(403)

    # Prevent double requests (i.e. give bibupload a chance to make the change)
    key = "community_curate:%s_%s" % (community_id, recid)
    cache_action = cache.get(key)
    if cache_action == action or cache_action in ['reject', 'remove']:
        return jsonify({'status': 'success', 'cache': 1})
    elif cache_action:
        # Operation under way, but the same action
        return jsonify({'status': 'failure', 'cache': 1})

    if action == "accept":
        res = u.accept_record(recid)
    elif action == "reject" or action == "remove":
        res = u.reject_record(recid)
    if res:
        # Set 5 min cache to allow bibupload/webcoll to finish
        cache.set(key, action, timeout=5 * 60)
        return jsonify({'status': 'success', 'cache': 0})
    else:
        return jsonify({'status': 'failure', 'cache': 0})
Example #9
0
def render_other_dataset_html(recid, display_link = True):
    """ Try to render the basic content of an unknown dataset, both for the tab and the record
    @param display_link Indicates if a link to the data record should be displayed
    @type display_link boolean
    """
    from invenio.legacy.search_engine import get_fieldvalues

    c = [] #collecting parts of the output
    c.append("<div style=\"background-color: #ececec; padding:10px;\">")

    comments = get_fieldvalues(recid, '520__h')
    if comments:
        comments = comments[0]

    c.append("<br />")
    c.append("<b>Description: </b> " + comments + "<br />")
    c.append("<br />")

    link_txt = "Go to the record"
    if display_link:
        c.append("<a href=\"%s/record/%s\">%s</a>" % (CFG_SITE_URL, str(recid), link_txt))

    c.append("<br /><br />")
    c.append("</div>")
    return "\n".join(c)
Example #10
0
def openaire_register_doi(recid):
    """
    Register a DOI for new publication

    If it fails, it will retry every 10 minutes for 1 hour.
    """
    doi_val = get_fieldvalues(recid, "0247_a")[0]
    logger.debug("Found DOI %s in record %s" % (doi_val, recid))

    pid = PersistentIdentifier.get("doi", doi_val)
    if not pid:
        logger.debug("DOI not locally managed.")
        return
    else:
        logger.debug("DOI locally managed.")

    if not pid.has_object("rec", recid):
        raise Exception(
            "DOI %s is not assigned to record %s." % (doi_val, recid))

    if pid.is_new() or pid.is_reserved():
        logger.info("Registering DOI %s for record %s" % (doi_val, recid))

        url = "%s/record/%s" % (CFG_DATACITE_SITE_URL, recid)
        doc = format_record(recid, DEPOSIT_DATACITE_OF)

        if not pid.register(url=url, doc=doc):
            m = "Failed to register DOI %s" % doi_val
            logger.error(m + "\n%s\n%s" % (url, doc))
            if not openaire_register_doi.request.is_eager:
                raise openaire_register_doi.retry(exc=Exception(m))
        else:
            logger.info("Successfully registered DOI %s." % doi_val)
Example #11
0
def render_other_dataset_html(recid, display_link=True):
    """ Try to render the basic content of an unknown dataset, both for the tab and the record
    @param display_link Indicates if a link to the data record should be displayed
    @type display_link boolean
    """
    from invenio.legacy.search_engine import get_fieldvalues

    c = []  # collecting parts of the output
    c.append('<div style="background-color: #ececec; padding:10px;">')

    comments = get_fieldvalues(recid, "520__h")
    if comments:
        comments = comments[0]

    c.append("<br />")
    c.append("<b>Description: </b> " + comments + "<br />")
    c.append("<br />")

    link_txt = "Go to the record"
    if display_link:
        c.append('<a href="%s/record/%s">%s</a>' % (CFG_SITE_URL, str(recid), link_txt))

    c.append("<br /><br />")
    c.append("</div>")
    return "\n".join(c)
Example #12
0
def openaire_register_doi(recid):
    """
    Register a DOI for new publication

    If it fails, it will retry every 10 minutes for 1 hour.
    """
    doi_val = get_fieldvalues(recid, "0247_a")[0]
    logger.debug("Found DOI %s in record %s" % (doi_val, recid))

    pid = PersistentIdentifier.get("doi", doi_val)
    if not pid:
        logger.debug("DOI not locally managed.")
        return
    else:
        logger.debug("DOI locally managed.")

    if not pid.has_object("rec", recid):
        raise Exception(
            "DOI %s is not assigned to record %s." % (doi_val, recid))

    if pid.is_new() or pid.is_reserved():
        logger.info("Registering DOI %s for record %s" % (doi_val, recid))

        url = "%s/record/%s" % (CFG_DATACITE_SITE_URL, recid)
        doc = format_record(recid, DEPOSIT_DATACITE_OF)

        if not pid.register(url=url, doc=doc):
            m = "Failed to register DOI %s" % doi_val
            logger.error(m + "\n%s\n%s" % (url, doc))
            if not openaire_register_doi.request.is_eager:
                raise openaire_register_doi.retry(exc=Exception(m))
        else:
            logger.info("Successfully registered DOI %s." % doi_val)
Example #13
0
def openaire_delete_doi(recid):
    """
    Delete DOI in DataCite

    If it fails, it will retry every 10 minutes for 1 hour.
    """
    doi_val = get_fieldvalues(recid, "0247_a")[0]
    logger.debug("Found DOI %s in record %s" % (doi_val, recid))

    pid = PersistentIdentifier.get("doi", doi_val)
    if not pid:
        logger.debug("DOI not locally managed.")
        return
    else:
        logger.debug("DOI locally managed.")

    if not pid.has_object("rec", recid):
        raise Exception(
            "DOI %s is not assigned to record %s." % (doi_val, recid))

    if pid.is_registered():
        logger.info("Inactivating DOI %s for record %s" % (doi_val, recid))

        if not pid.delete():
            m = "Failed to inactive DOI %s" % doi_val
            logger.error(m)
            if not openaire_delete_doi.request.is_eager:
                raise openaire_delete_doi.retry(exc=Exception(m))
        else:
            logger.info("Successfully inactivated DOI %s." % doi_val)
Example #14
0
def openaire_delete_doi(recid):
    """
    Delete DOI in DataCite

    If it fails, it will retry every 10 minutes for 1 hour.
    """
    doi_val = get_fieldvalues(recid, "0247_a")[0]
    logger.debug("Found DOI %s in record %s" % (doi_val, recid))

    pid = PersistentIdentifier.get("doi", doi_val)
    if not pid:
        logger.debug("DOI not locally managed.")
        return
    else:
        logger.debug("DOI locally managed.")

    if not pid.has_object("rec", recid):
        raise Exception(
            "DOI %s is not assigned to record %s." % (doi_val, recid))

    if pid.is_registered():
        logger.info("Inactivating DOI %s for record %s" % (doi_val, recid))

        if not pid.delete():
            m = "Failed to inactive DOI %s" % doi_val
            logger.error(m)
            if not openaire_delete_doi.request.is_eager:
                raise openaire_delete_doi.retry(exc=Exception(m))
        else:
            logger.info("Successfully inactivated DOI %s." % doi_val)
Example #15
0
def openaire_altmetric_update(recids, upload=True):
    """
    Retrieve Altmetric information for a record.
    """
    logger.debug("Checking Altmetric for recids %s" % recids)
    a = Altmetric()

    records = []
    for recid in recids:
        logger.debug("Checking Altmetric for recid %s" % recid)
        try:
            # Check if we already have an Altmetric id
            sysno_inst = get_fieldvalues(recid, "035__9")
            if ["Altmetric"] in sysno_inst:
                continue

            doi_val = get_fieldvalues(recid, "0247_a")[0]
            logger.debug("Found DOI %s" % doi_val)
            json_res = a.doi(doi_val)
            logger.debug("Altmetric response: %s" % json_res)

            rec = {}
            record_add_field(rec, "001", controlfield_value=str(recid))

            if json_res:
                record_add_field(rec, "035", subfields=[("a", str(json_res["altmetric_id"])), ("9", "Altmetric")])
                records.append(rec)
        except AltmetricHTTPException as e:
            logger.warning(
                "Altmetric error for recid %s with DOI %s (status code %s): %s"
                % (recid, doi_val, e.status_code, str(e))
            )
            register_exception(
                prefix="Altmetric error (status code %s): %s" % (e.status_code, str(e)), alert_admin=False
            )
        except IndexError:
            logger.debug("No DOI found")
            pass

    if upload and records:
        if len(records) == 1:
            bibupload(record=records[0], file_prefix="altmetric")
        else:
            bibupload(collection=records, file_prefix="altmetric")

    return records
def format_element(bfo):
    """ Prints the control number of an author authority record in HTML.
    By default prints brief version.

    @param brief: whether the 'brief' rather than the 'detailed' format
    @type brief: 'yes' or 'no'
    """

    from invenio.messages import gettext_set_language
    _ = gettext_set_language(bfo.lang)    # load the right message language


    control_nos = [d['a'] for d in bfo.fields('035__') if d['a']]
    previous_recIDs = []
    parameters = []
    count = None
    publications_formatted = []
    ## for every control number that this author has, find all the connected records for each one
    for control_no in control_nos:
        for ctrl_number_field_numbers in CFG_BIBAUTHORITY_RECORD_AUTHOR_CONTROL_NUMBER_FIELDS:
            parameters.append(ctrl_number_field_numbers + ":" + control_no.replace(" ",""))
        recIDs = [x for x in get_dependent_records_for_control_no(control_no) if x not in previous_recIDs]
        count = len(recIDs)
        count_string = str(count) + " dependent records"
        from urllib import quote
        # if we have dependent records, provide a link to them
        if count:
            prefix_pattern = "<a href='" + CFG_SITE_URL + "%s" + "'>"
            postfix = "</a>"
            url_str = ''
            # print as many of the author's publications as the CFG_BIBAUTHORITY_PUBLICATION_VIEW_LIMIT allows
            for i in range(count if count<CFG_BIBAUTHORITY_PUBLICATION_VIEW_LIMIT else CFG_BIBAUTHORITY_PUBLICATION_VIEW_LIMIT):
                    title = get_fieldvalues(recIDs[i],"245__a")
                    if title:
                        url_str = "/record/"+ str(recIDs[i])
                        prefix = prefix_pattern % url_str
                        publications_formatted.append(prefix + title[0] + postfix)

    title = "<strong>" + _("Publication(s)") + "</strong>"
    if publications_formatted:
        content = "<ul><li>" + "</li><li> ".join(publications_formatted) + "</li></ul>"
    else:
        content = "<strong style='color:red'>Missing !</strong>"

    p_val = quote(" or ".join(parameters))
        # include "&c=" parameter for bibliographic records
        # and one "&c=" parameter for authority records
    url_str = \
    "/search" + \
    "?p=" + p_val + \
    "&c=" + quote(CFG_SITE_NAME) + \
    "&c=" + CFG_BIBAUTHORITY_AUTHORITY_COLLECTION_NAME + \
    "&sc=1" + \
    "&ln=" + bfo.lang
    prefix = prefix_pattern % url_str
    content += prefix + "See all " + str(count) + " publications..." + postfix

    return "<p>" + title + ": " + content + "</p>"
Example #17
0
def xapian_add_all(lower_recid, upper_recid):
    """
    Adds the regarding field values of all records from the lower recid to the upper one to Xapian.
    It preserves the fulltext information.
    """
    xapian_init_databases()
    for recid in range(lower_recid, upper_recid + 1):
        try:
            abstract = unicode(
                get_fieldvalues(recid, CFG_MARC_ABSTRACT)[0], 'utf-8')
        except:
            abstract = ""
        xapian_add(recid, "abstract", abstract)

        try:
            first_author = get_fieldvalues(recid, CFG_MARC_AUTHOR_NAME)[0]
            additional_authors = reduce(
                lambda x, y: x + " " + y,
                get_fieldvalues(recid, CFG_MARC_ADDITIONAL_AUTHOR_NAME), '')
            author = unicode(first_author + " " + additional_authors, 'utf-8')
        except:
            author = ""
        xapian_add(recid, "author", author)

        try:
            bibrecdocs = BibRecDocs(recid)
            fulltext = unicode(bibrecdocs.get_text(), 'utf-8')
        except:
            fulltext = ""
        xapian_add(recid, "fulltext", fulltext)

        try:
            keyword = unicode(
                reduce(lambda x, y: x + " " + y,
                       get_fieldvalues(recid, CFG_MARC_KEYWORD), ''), 'utf-8')
        except:
            keyword = ""
        xapian_add(recid, "keyword", keyword)

        try:
            title = unicode(get_fieldvalues(recid, CFG_MARC_TITLE)[0], 'utf-8')
        except:
            title = ""
        xapian_add(recid, "title", title)
Example #18
0
def curate():
    """
    Index page with uploader and list of existing depositions
    """
    from invenio.legacy.search_engine import get_fieldvalues
    action = request.values.get('action')
    community_id = request.values.get('collection')
    recid = request.values.get('recid', 0, type=int)
    # Allowed actions
    if action not in ['accept', 'reject', 'remove']:
        abort(400)

    # Check recid
    if not recid:
        abort(400)
    recid = int(recid)

    # Does community exists
    u = Community.query.filter_by(id=community_id).first()
    if not u:
        abort(400)
    # Check permission to perform action on this record
    # - Accept and reject is done by community owner
    # - Remove  is done by record owner
    if action in ['accept', 'reject', ]:
        if u.id_user != current_user.get_id():
            abort(403)
    elif action == 'remove':
        try:
            email = get_fieldvalues(recid, '8560_f')[0]
            if email != current_user['email']:
                abort(403)
            # inform interested parties of removing collection/community
            curate_record.send(u, action=action, recid=recid, user=current_user)
        except (IndexError, KeyError):
            abort(403)

    # Prevent double requests (i.e. give bibupload a chance to make the change)
    key = "community_curate:%s_%s" % (community_id, recid)
    cache_action = cache.get(key)
    if cache_action == action or cache_action in ['reject', 'remove']:
        return jsonify({'status': 'success', 'cache': 1})
    elif cache_action:
        # Operation under way, but the same action
        return jsonify({'status': 'failure', 'cache': 1})

    if action == "accept":
        res = u.accept_record(recid)
    elif action == "reject" or action == "remove":
        res = u.reject_record(recid)
    if res:
        # Set 5 min cache to allow bibupload/webcoll to finish
        cache.set(key, action, timeout=5*60)
        return jsonify({'status': 'success', 'cache': 0})
    else:
        return jsonify({'status': 'failure', 'cache': 0})
Example #19
0
def openaire_altmetric_update(recids, upload=True):
    """
    Retrieve Altmetric information for a record.
    """
    logger.debug("Checking Altmetric for recids %s" % recids)
    a = Altmetric()

    records = []
    for recid in recids:
        logger.debug("Checking Altmetric for recid %s" % recid)
        try:
            # Check if we already have an Altmetric id
            sysno_inst = get_fieldvalues(recid, "035__9")
            if ['Altmetric'] in sysno_inst:
                continue

            doi_val = get_fieldvalues(recid, "0247_a")[0]
            logger.debug("Found DOI %s" % doi_val)
            json_res = a.doi(doi_val)
            logger.debug("Altmetric response: %s" % json_res)

            rec = {}
            record_add_field(rec, "001", controlfield_value=str(recid))

            if json_res:
                record_add_field(rec, '035', subfields=[
                    ('a', str(json_res['altmetric_id'])),
                    ('9', 'Altmetric')
                ])
                records.append(rec)
        except AltmetricHTTPException, e:
            logger.warning(
                'Altmetric error for recid %s with DOI %s (status code %s): %s'
                % (recid, doi_val, e.status_code, str(e))
            )
            register_exception(
                prefix='Altmetric error (status code %s): %s' % (
                    e.status_code, str(e)),
                alert_admin=False
            )
        except IndexError:
            logger.debug("No DOI found")
            pass
def xapian_add_all(lower_recid, upper_recid):
    """
    Adds the regarding field values of all records from the lower recid to the upper one to Xapian.
    It preserves the fulltext information.
    """
    xapian_init_databases()
    for recid in range(lower_recid, upper_recid + 1):
        try:
            abstract = unicode(get_fieldvalues(recid, CFG_MARC_ABSTRACT)[0], 'utf-8')
        except:
            abstract = ""
        xapian_add(recid, "abstract", abstract)

        try:
            first_author = get_fieldvalues(recid, CFG_MARC_AUTHOR_NAME)[0]
            additional_authors = reduce(lambda x, y: x + " " + y, get_fieldvalues(recid, CFG_MARC_ADDITIONAL_AUTHOR_NAME), '')
            author = unicode(first_author + " " + additional_authors, 'utf-8')
        except:
            author = ""
        xapian_add(recid, "author", author)

        try:
            bibrecdocs = BibRecDocs(recid)
            fulltext = unicode(bibrecdocs.get_text(), 'utf-8')
        except:
            fulltext = ""
        xapian_add(recid, "fulltext", fulltext)

        try:
            keyword = unicode(reduce(lambda x, y: x + " " + y, get_fieldvalues(recid, CFG_MARC_KEYWORD), ''), 'utf-8')
        except:
            keyword = ""
        xapian_add(recid, "keyword", keyword)

        try:
            title = unicode(get_fieldvalues(recid, CFG_MARC_TITLE)[0], 'utf-8')
        except:
            title = ""
        xapian_add(recid, "title", title)
Example #21
0
def get_recid_and_reportnumber(recid=None, reportnumber=None, keep_original_reportnumber=True):
    """
    Given at least a recid or a reportnumber, this function will look into
    the system for the matching record and will return a normalized
    recid and the primary reportnumber.
    @raises ValueError: in case of no record matched.
    """
    if recid:
        ## Recid specified receives priority.
        recid = int(recid)
        values = get_fieldvalues(recid, CFG_PRIMARY_REPORTNUMBER)
        if values:
            ## Let's take whatever reportnumber is stored in the matching record
            reportnumber = values[0]
            return recid, reportnumber
        else:
            raise ValueError("The record %s does not have a primary report number" % recid)
    elif reportnumber:
        ## Ok reportnumber specified, let's better try 1st with primary and then
        ## with other reportnumber
        recids = search_pattern(p='%s:"%s"' % (CFG_PRIMARY_REPORTNUMBER, reportnumber))
        if not recids:
            ## Not found as primary
            recids = search_pattern(p='reportnumber:"%s"' % reportnumber)
        if len(recids) > 1:
            raise ValueError('More than one record matches the reportnumber "%s": %s' % (reportnumber, ', '.join(recids)))
        elif len(recids) == 1:
            recid = list(recids)[0]
            if keep_original_reportnumber:
                return recid, reportnumber
            else:
                reportnumbers = get_fieldvalues(recid, CFG_PRIMARY_REPORTNUMBER)
                if not reportnumbers:
                    raise ValueError("The matched record %s does not have a primary report number" % recid)
                return recid, reportnumbers[0]
        else:
            raise ValueError("No records are matched by the provided reportnumber: %s" % reportnumber)
    raise ValueError("At least the recid or the reportnumber must be specified")
Example #22
0
def get_field_content_in_utf8(recid, field, tag_dict, separator=' '):
    """
    Returns the content of a field comprised of tags
    concatenated in an UTF-8 string.
    """
    content = ''
    try:
        values = []
        for tag in tag_dict[field]:
            values.extend(get_fieldvalues(recid, tag))
        content = unicode(separator.join(values), 'utf-8')
    except:
        pass
    return content
Example #23
0
def get_field_content_in_utf8(recid, field, tag_dict, separator=' '):
    """
    Returns the content of a field comprised of tags
    concatenated in an UTF-8 string.
    """
    content = ''
    try:
        values = []
        for tag in tag_dict[field]:
            values.extend(get_fieldvalues(recid, tag))
        content = unicode(separator.join(values), 'utf-8')
    except:
        pass
    return content
Example #24
0
def find_book(citation_element):
    books_recids = get_collection_reclist('Books')
    search_string = citation_element['title']
    recids = intbitset(get_recids_matching_query(search_string, 'title'))
    recids &= books_recids
    if len(recids) == 1:
        return recids

    if 'year' in citation_element:
        for recid in recids:
            year_tags = get_fieldvalues(recid, '269__c')
            for tag in year_tags:
                if tag == citation_element['year']:
                    return [recid]

    return []
Example #25
0
def find_book(citation_element):
    books_recids = get_collection_reclist("Books")
    search_string = citation_element["title"]
    recids = intbitset(get_recids_matching_query(search_string, "title"))
    recids &= books_recids
    if len(recids) == 1:
        return recids

    if "year" in citation_element:
        for recid in recids:
            year_tags = get_fieldvalues(recid, "269__c")
            for tag in year_tags:
                if tag == citation_element["year"]:
                    return [recid]

    return []
Example #26
0
def find_book(citation_element):
    books_recids = get_collection_reclist('Books')
    search_string = citation_element['title']
    recids = intbitset(get_recids_matching_query(search_string, 'title'))
    recids &= books_recids
    if len(recids) == 1:
        return recids

    if 'year' in citation_element:
        for recid in recids:
            year_tags = get_fieldvalues(recid, '269__c')
            for tag in year_tags:
                if tag == citation_element['year']:
                    return [recid]

    return []
def bst_openaire_check_rights():
    """
    Tasklet to verify access rights consistency.
    """
    restrictions = {
        'cc0': '',
        'openAccess': '',
        'closedAccess': 'status: closedAccess',
        'restrictedAccess': 'status: restrictedAccess',
        'embargoedAccess': 'firerole: deny until "%(date)s"\nallow any',
    }

    errors = []

    keys = dict(current_app.config['CFG_ACCESS_RIGHTS_KEYS']).keys()

    for access_rights in keys:
        write_message(
            "Checking records with access rights '%s'" % access_rights)
        recids = search_pattern(p=access_rights, f="542__l")

        for r in recids:
            date = ''
            if access_rights == 'embargoedAccess':
                try:
                    date = get_fieldvalues(r, "942__a")[0]
                except IndexError:
                    raise Exception(
                        "Embargoed record %s is missing embargo date in 942__a"
                        % r
                    )
            expected_status = restrictions[access_rights] % {'date': date}

            brd = BibRecDocs(r)
            for d in brd.list_bibdocs():
                real_status = d.get_status()
                if real_status != expected_status:
                    d.set_status(expected_status)
                    write_message(
                        "Fixed record %s with wrong status. From: %s To: %s" %
                        (r, real_status, expected_status))

    for e in errors:
        write_message(e)
Example #28
0
def bst_openaire_check_rights():
    """
    Tasklet to verify access rights consistency.
    """
    restrictions = {
        'cc0': '',
        'openAccess': '',
        'closedAccess': 'status: closedAccess',
        'restrictedAccess': 'status: restrictedAccess',
        'embargoedAccess': 'firerole: deny until "%(date)s"\nallow any',
    }

    errors = []

    keys = dict(current_app.config['CFG_ACCESS_RIGHTS_KEYS']).keys()

    for access_rights in keys:
        write_message("Checking records with access rights '%s'" %
                      access_rights)
        recids = search_pattern(p=access_rights, f="542__l")

        for r in recids:
            date = ''
            if access_rights == 'embargoedAccess':
                try:
                    date = get_fieldvalues(r, "942__a")[0]
                except IndexError:
                    raise Exception(
                        "Embargoed record %s is missing embargo date in 942__a"
                        % r)
            expected_status = restrictions[access_rights] % {'date': date}

            brd = BibRecDocs(r)
            for d in brd.list_bibdocs():
                real_status = d.get_status()
                if real_status != expected_status:
                    d.set_status(expected_status)
                    write_message(
                        "Fixed record %s with wrong status. From: %s To: %s" %
                        (r, real_status, expected_status))

    for e in errors:
        write_message(e)
Example #29
0
def get_tweets(query):
    """
    This is how simple it is to fetch tweets :-)
    """
    ## We shall skip tweets that already in the system.
    previous_tweets = perform_request_search(p='980__a:"TWEET" 980__b:"%s"' % query, sf='970__a', so='a')
    if previous_tweets:
        ## A bit of an algorithm to retrieve the last Tweet ID that was stored
        ## in our records
        since_id = int(get_fieldvalues(previous_tweets[0], '970__a')[0])
    else:
        since_id = 0
    final_results = []
    results = list(_TWITTER_API.Search(query, rpp=100, since_id=since_id).results)
    final_results.extend(results)
    page = 1
    while len(results) == 100: ## We stop if there are less than 100 results per page
        page += 1
        results = list(_TWITTER_API.Search(query, rpp=100, since_id=since_id, page=page).results)
        final_results.extend(results)
    return final_results
Example #30
0
def send_user_commit_notification_email(userinfo, ticket):
    '''
    Sends commit notification email to RT system
    '''
    # send eMail to RT
    mailcontent = []
    m = mailcontent.append
    m("A user committed a change through the web interface.")
    m("User Information:")

    for k, v in iteritems(userinfo):
        if v:
            m("    %s: %s" % (k, v))

    m("\nChanges:\n")

    for t in ticket:
        m(" --- <start> --- \n")
        for k, v in iteritems(t):
            m("    %s: %s \n" % (str(k), str(v)))
            if k == 'bibref':
                try:
                    br = int(v.split(',')[1])
                    m("        Title: %s\n" % search_engine.get_fieldvalues(br, "245__a"))
                except (TypeError, ValueError, IndexError):
                    pass
        m(" --- <end> --- \n")

    if ticket and mailcontent:
        sender = CFG_BIBAUTHORID_AUTHOR_TICKET_ADMIN_EMAIL
        send_email(sender,
                   CFG_BIBAUTHORID_AUTHOR_TICKET_ADMIN_EMAIL,
                   subject="[Author] NO ACTIONS NEEDED. Changes performed by SSO user.",
                   content="\n".join(mailcontent))

    return True
Example #31
0
def render_hepdata_dataset_html(dataset, recid, seq, display_link=True):
    """ Rendering a single dataset
    @param display_link: Indicates if a link to the data record should be displayed
    @type display_link: boolean
    """
    from invenio.legacy.search_engine import get_fieldvalues

    should_expand_table = len(dataset.data) > 0

    # calculating the table width

    c = [] #collecting parts of the output
    # Fixing identifiers and classes typical for this particular dataset
    args = {
        "data_layer_class" : ("hepdata_data_%i" % (seq, )),
        "plots_layer_class" : ("hepdata_plots_%i" % (seq, )),
        "data_expander_id" : ("hepdata_expander_%i" % (seq, )),
        "masterplot_layer_class" : ("hepdata_masterplot_layer_%i" % (seq,)),
        "masterplot_expander_id" : ("hepdata_masterplot_expander_%i" % (seq,)),
        "plots_rowspan": len(dataset.data),
        "masterplot_rowspan": len(dataset.data_qualifiers) + 3
        }

    args["collapse_message_masterplot"] = "&#8595;&#8595;&#8595;Hide&#8595;&#8595;&#8595;"
    args["expand_message_masterplot"] = "&#8593;&#8593;&#8593;Plot&#8593;&#8593;&#8593;"

    args["onclick_code_masterplot_expand"] = "expandCollapseDataPlots(this.parentNode.parentNode.parentNode.parentNode, '%(masterplot_layer_class)s', '%(plots_layer_class)s', '%(data_layer_class)s', '%(masterplot_expander_id)s', '%(collapse_message_masterplot)s', '%(expand_message_masterplot)s');" % args

    args["collapse_message_moredata"] = "&#8593;&#8593;&#8593;Collapse&#8593;&#8593;&#8593;"
    args["expand_message_moredata"] = "&#8595;&#8595;&#8595;Expand&#8595;&#8595;&#8595;"

    args["onclick_code_moredata_expand"] = "return expandCollapseDataPlots(this.parentNode.parentNode.parentNode.parentNode, '%(data_layer_class)s','%(plots_layer_class)s', '%(masterplot_layer_class)s', '%(data_expander_id)s', '%(collapse_message_moredata)s', '%(expand_message_moredata)s');" % args


    args["expander_colspan"] = dataset.num_columns + 2 # table_width + 2
    args["plots_code"] = render_plots_page(dataset, recid, seq)
    multiplot_url = get_hepdatamultiplot_image_url(recid, dataset)
    if multiplot_url:
        args["multiplot_url"] = multiplot_url

    # rendering the HTML code

    c.append("<div style=\"background-color: #ececec; padding:10px;\">")
    # baseurl = get_hepdata_link(recid)
    # c.append("<h3><a href=\"%s/d%i\">%s</a></h3>" % (baseurl, seq, dataset.name, ))
    for fmt in dataset.additional_files:
        c.append("<a href=\"%s/%s\">%s</a>" % (CFG_HEPDATA_URL, fmt[0], fmt[1]))

    dataset.comments.strip()
    c.append("<br />")
    c.append("<b>Description: </b> " + dataset.comments + "<br />")
    c.append("<br />")

    publisher = get_fieldvalues(dataset.recid, '520__9')

    link_txt = "Go to the record"
    if display_link:
        c.append("<a href=\"%s/%s/%s\">%s</a>" % (CFG_BASE_URL, CFG_SITE_RECORD, str(dataset.recid), link_txt))

    temporary = get_fieldvalues(dataset.recid, '500__a')
    if temporary:
        temporary = temporary[0]

    if publisher[0] == 'HEPDATA' and temporary !="* Temporary entry *" :
        c.append("<div class=\"hepdataTablePlaceholder\">")
        c.append("<table cellpadding=\"0\" cellspacing=\"0\" class=\"hepdataTable\">")

        # rendering files links
        plain_file_url = get_fieldvalues(dataset.recid, '8564_u')
        if plain_file_url:
            c.append("<tr><td colspan=\"%(colspan)s\" style=\"text-align: left;\"> <a href=\"%(plain_file_url)s\"> <img src=\"%(site_url)s/img/file-icon-text-15x20.gif\"></img><br> Plain</td>" % {
                "site_url" : CFG_BASE_URL,
                "plain_file_url" : plain_file_url[0],
                "colspan" : str(dataset.num_columns)
                })

            c.append("""<td rowspan="%(rowspan)i" class="expanderTableCell masterPlotExpanderTableCell">""" \
                         % {"rowspan" :  len(dataset.data_qualifiers) + 3})
            if multiplot_url:
                c.append("""<p class="expander masterPlotExpander" onclick="%(onclick_code_masterplot_expand)s" id="%(masterplot_expander_id)s"><a>%(expand_message_masterplot)s</a></p>""" \
                             % args)
            c.append("</td>")
            c.append("<td class=\"masterplot_cell\" rowspan=\"%(masterplot_rowspan)s\"><div class=\"%(masterplot_layer_class)s\" style=\"display:none;\">" % args)
            if multiplot_url:
                c.append("<div><img src=\"%(multiplot_url)s\" alt=\"The plot is not available\" class=\"hepdataimg\"></img></div>" % args)

            c.append("</div></td>" % args)
            c.append("</tr>")
        else:
            from invenio.utils.hepdata.api import create_hepdata_ticket
            create_hepdata_ticket(dataset.recid, 'Data missing in 8564_u')

        # rendering column titles
        c.append("<tr>")
        for title in dataset.column_titles:
            title_str = ""

            strip_str = html_strip(title["content"])
            if strip_str == ":":
                strip_str = ""
            additional_class = "hepdataTableTitleLayer"
            try:
                title_str = "$" + data_qualifier_to_LateX(strip_str) + "$"
            except:
                title_str = strip_str
            if title_str in ("", "$$"):
                title_str = ""
                additional_class = "hepdataTableEmptyTitleLayer"
            c.append("<th colspan=\"%i\" class=\"hepdataColumnHeader\"><div class=\"%s\">%s</div></th>" % (title["colspan"], additional_class, title_str))
        c.append("</tr>")

        for data_line in dataset.data_qualifiers:
            c.append("<tr>")
            for data in data_line:
                qualifier_string = ""

                # stripping from spaces and single strings having only ":" sign
                strip_str = html_strip(data["content"])
                if strip_str == ":":
                    strip_str = ""
                additional_class = "hepdataQualifierLayer"
                try:
                    qualifier_string = "$" + data_qualifier_to_LateX(strip_str) + "$"

                except Exception, e:
                    qualifier_string = strip_str

                if qualifier_string in ("", "$$"):
                    qualifier_string = ""
                    additional_class = "hepdataEmptyQualifierLayer"
                c.append("<td colspan=\"%i\" class=\"hepdataTableQualifierCell\"><div class=\"%s\">%s</div></td>" % ( \
                        data["colspan"],
                        additional_class,
                        qualifier_string, ))
            c.append("</tr>")
        c.append("</td>")
        c.append("</tr>")


        c.append("<tr>")
        for header in dataset.column_headers:
            header_str = ""
            try:
                header_str = "$" + data_qualifier_to_LateX(header["content"]) + "$"
            except Exception, e:
                header_str = header["content"]

            c.append("<th colspan=\"%i\" class=\"hepdataColumnHeader\"><div class=\"hepdataTableHeaderLayer\">%s</div></th>" % (header["colspan"],
                                                     header_str))
Example #32
0
def process_batch_job(batch_job_file):
    """ Processes a batch job description dictionary

    @param batch_job_file: a fullpath to a batch job file
    @type batch_job_file: string
    @return: 1 if the process was successful, 0 if not
    @rtype; int
    """

    def upload_marcxml_file(marcxml):
        """ Creates a temporary marcxml file and sends it to bibupload
        """
        xml_filename = 'bibencode_'+ str(batch_job['recid']) + '_' + str(uuid.uuid4()) + '.xml'
        xml_filename = os.path.join(invenio.config.CFG_TMPSHAREDDIR, xml_filename)
        xml_file = file(xml_filename, 'w')
        xml_file.write(marcxml)
        xml_file.close()
        targs = ['-c', xml_filename]
        task_low_level_submission('bibupload', 'bibencode', *targs)

    #---------#
    # GENERAL #
    #---------#

    _task_write_message("----------- Handling Master -----------")

    ## Check the validity of the batch file here
    batch_job = json_decode_file(batch_job_file)

    ## Sanitise batch description and raise errrors
    batch_job = sanitise_batch_job(batch_job)

    ## Check if the record exists
    if record_exists(batch_job['recid']) < 1:
        raise Exception("Record not found")

    recdoc = BibRecDocs(batch_job['recid'])

    #--------------------#
    # UPDATE FROM MASTER #
    #--------------------#

    ## We want to add new stuff to the video's record, using the master as input
    if getval(batch_job, 'update_from_master'):
        found_master = False
        bibdocs = recdoc.list_bibdocs()
        for bibdoc in bibdocs:
            bibdocfiles = bibdoc.list_all_files()
            for bibdocfile in bibdocfiles:
                comment = bibdocfile.get_comment()
                description = bibdocfile.get_description()
                subformat = bibdocfile.get_subformat()
                m_comment = getval(batch_job, 'bibdoc_master_comment', comment)
                m_description = getval(batch_job, 'bibdoc_master_description', description)
                m_subformat = getval(batch_job, 'bibdoc_master_subformat', subformat)
                if (comment == m_comment and
                    description == m_description and
                    subformat == m_subformat):
                    found_master = True
                    batch_job['input'] = bibdocfile.get_full_path()
                    ## Get the aspect of the from the record
                    try:
                        ## Assumes pbcore metadata mapping
                        batch_job['aspect'] = get_fieldvalues(124, CFG_BIBENCODE_ASPECT_RATIO_MARC_FIELD)[0]
                    except IndexError:
                        pass
                    break
            if found_master:
                break
        if not found_master:
            _task_write_message("Video master for record %d not found"
                          % batch_job['recid'])
            task_update_progress("Video master for record %d not found"
                                 % batch_job['recid'])
            ## Maybe send an email?
            return 1

    ## Clean the job to do no upscaling etc
    if getval(batch_job, 'assure_quality'):
        batch_job = clean_job_for_quality(batch_job)

    global _BATCH_STEPS
    _BATCH_STEPS = len(batch_job['jobs'])

    ## Generate the docname from the input filename's name or given name
    bibdoc_video_docname, bibdoc_video_extension = decompose_file(batch_job['input'])[1:]
    if not bibdoc_video_extension or getval(batch_job, 'bibdoc_master_extension'):
        bibdoc_video_extension = getval(batch_job, 'bibdoc_master_extension')
    if getval(batch_job, 'bibdoc_master_docname'):
        bibdoc_video_docname = getval(batch_job, 'bibdoc_master_docname')

    write_message("Creating BibDoc for %s" % bibdoc_video_docname)
    ## If the bibdoc exists, receive it
    if bibdoc_video_docname in recdoc.get_bibdoc_names():
        bibdoc_video = recdoc.get_bibdoc(bibdoc_video_docname)
    ## Create a new bibdoc if it does not exist
    else:
        bibdoc_video = recdoc.add_bibdoc(docname=bibdoc_video_docname)

    ## Get the directory auf the newly created bibdoc to copy stuff there
    bibdoc_video_directory = bibdoc_video.get_base_dir()

    #--------#
    # MASTER #
    #--------#
    if not getval(batch_job, 'update_from_master'):
        if getval(batch_job, 'add_master'):
            ## Generate the right name for the master
            ## The master should be hidden first an then renamed
            ## when it is really available
            ## !!! FIX !!!
            _task_write_message("Adding %s master to the BibDoc"
                          % bibdoc_video_docname)
            master_format = compose_format(
                                    bibdoc_video_extension,
                                    getval(batch_job, 'bibdoc_master_subformat', 'master')
                                    )
            ## If a file of the same format is there, something is wrong, remove it!
            ## it might be caused by a previous corrupted submission etc.
            if bibdoc_video.format_already_exists_p(master_format):
                bibdoc_video.delete_file(master_format, 1)
            bibdoc_video.add_file_new_format(
                    batch_job['input'],
                    version=1,
                    description=getval(batch_job, 'bibdoc_master_description'),
                    comment=getval(batch_job, 'bibdoc_master_comment'),
                    docformat=master_format
                    )

    #-----------#
    # JOBS LOOP #
    #-----------#

    return_code = 1
    global _BATCH_STEP

    for job in batch_job['jobs']:

        _task_write_message("----------- Job %s of %s -----------"
                           % (_BATCH_STEP, _BATCH_STEPS))

        ## Try to substitute docname with master docname
        if getval(job, 'bibdoc_docname'):
            job['bibdoc_docname'] = Template(job['bibdoc_docname']).safe_substitute({'bibdoc_master_docname': bibdoc_video_docname})

        #-------------#
        # TRANSCODING #
        #-------------#

        if job['mode'] == 'encode':

            ## Skip the job if assure_quality is not set and marked as fallback
            if not getval(batch_job, 'assure_quality') and getval(job, 'fallback'):
                continue

            if getval(job, 'profile'):
                profile = get_encoding_profile(job['profile'])
            else:
                profile = None
            ## We need an extension defined fot the video container
            bibdoc_video_extension = getval(job, 'extension',
                                            getval(profile, 'extension'))
            if not bibdoc_video_extension:
                raise Exception("No container/extension defined")
            ## Get the docname and subformat
            bibdoc_video_subformat = getval(job, 'bibdoc_subformat')
            bibdoc_slave_video_docname = getval(job, 'bibdoc_docname', bibdoc_video_docname)
            ## The subformat is incompatible with ffmpegs name convention
            ## We do the encoding without and rename it afterwards
            bibdoc_video_fullpath = compose_file(
                                                 bibdoc_video_directory,
                                                 bibdoc_video_extension
                                                 )
            _task_write_message("Transcoding %s to %s;%s" % (bibdoc_slave_video_docname,
                                bibdoc_video_extension,
                                bibdoc_video_subformat))
            ## We encode now directly into the bibdocs directory
            encoding_result = encode_video(
                 input_file=batch_job['input'],
                 output_file=bibdoc_video_fullpath,
                 acodec=getval(job, 'audiocodec'),
                 vcodec=getval(job, 'videocodec'),
                 abitrate=getval(job, 'videobitrate'),
                 vbitrate=getval(job, 'audiobitrate'),
                 resolution=getval(job, 'resolution'),
                 passes=getval(job, 'passes', 1),
                 special=getval(job, 'special'),
                 specialfirst=getval(job, 'specialfirst'),
                 specialsecond=getval(job, 'specialsecond'),
                 metadata=getval(job, 'metadata'),
                 width=getval(job, 'width'),
                 height=getval(job, 'height'),
                 aspect=getval(batch_job, 'aspect'), # Aspect for every job
                 profile=getval(job, 'profile'),
                 update_fnc=_task_update_overall_status,
                 message_fnc=_task_write_message
                 )
            return_code &= encoding_result
            ## only on success
            if  encoding_result:
                ## Rename it, adding the subformat
                os.rename(bibdoc_video_fullpath,
                          compose_file(bibdoc_video_directory,
                                       bibdoc_video_extension,
                                       bibdoc_video_subformat,
                                       1,
                                       bibdoc_slave_video_docname)
                          )
                #bibdoc_video._build_file_list()
                bibdoc_video.touch()
                bibdoc_video._sync_to_db()
                bibdoc_video_format = compose_format(bibdoc_video_extension,
                                                     bibdoc_video_subformat)
                if getval(job, 'bibdoc_comment'):
                    bibdoc_video.set_comment(getval(job, 'bibdoc_comment'),
                                              bibdoc_video_format)
                if getval(job, 'bibdoc_description'):
                    bibdoc_video.set_description(getval(job, 'bibdoc_description'),
                                                 bibdoc_video_format)

        #------------#
        # EXTRACTION #
        #------------#

        # if there are multiple extraction jobs, all the produced files
        # with the same name will be in the same bibdoc! Make sure that
        # you use different subformats or docname templates to avoid
        # conflicts.

        if job['mode'] == 'extract':
            if getval(job, 'profile'):
                profile = get_extract_profile(job['profile'])
            else:
                profile = {}
            bibdoc_frame_subformat = getval(job, 'bibdoc_subformat')
            _task_write_message("Extracting frames to temporary directory")
            tmpdir = invenio.config.CFG_TMPDIR + "/" + str(uuid.uuid4())
            os.mkdir(tmpdir)
            #Move this to the batch description
            bibdoc_frame_docname = getval(job, 'bibdoc_docname', bibdoc_video_docname)
            tmpfname = (tmpdir + "/" + bibdoc_frame_docname + '.'
                        + getval(profile, 'extension',
                        getval(job, 'extension', 'jpg')))
            extraction_result = extract_frames(input_file=batch_job['input'],
                           output_file=tmpfname,
                           size=getval(job, 'size'),
                           positions=getval(job, 'positions'),
                           numberof=getval(job, 'numberof'),
                           width=getval(job, 'width'),
                           height=getval(job, 'height'),
                           aspect=getval(batch_job, 'aspect'),
                           profile=getval(job, 'profile'),
                           update_fnc=_task_update_overall_status,
                           )
            return_code &= extraction_result

            ## only on success:
            if extraction_result:
                ## for every filename in the directorys, create a bibdoc that contains
                ## all sizes of the frame from the two directories
                files = os.listdir(tmpdir)
                for filename in files:
                    ## The docname was altered by BibEncode extract through substitution
                    ## Retrieve it from the filename again
                    bibdoc_frame_docname, bibdoc_frame_extension = os.path.splitext(filename)
                    _task_write_message("Creating new bibdoc for %s" % bibdoc_frame_docname)
                    ## If the bibdoc exists, receive it
                    if bibdoc_frame_docname in recdoc.get_bibdoc_names():
                        bibdoc_frame = recdoc.get_bibdoc(bibdoc_frame_docname)
                    ## Create a new bibdoc if it does not exist
                    else:
                        bibdoc_frame = recdoc.add_bibdoc(docname=bibdoc_frame_docname)

                    ## The filename including path from tmpdir
                    fname = os.path.join(tmpdir, filename)

                    bibdoc_frame_format = compose_format(bibdoc_frame_extension, bibdoc_frame_subformat)
                    ## Same as with the master, if the format allready exists,
                    ## override it, because something went wrong before
                    if bibdoc_frame.format_already_exists_p(bibdoc_frame_format):
                        bibdoc_frame.delete_file(bibdoc_frame_format, 1)
                    _task_write_message("Adding %s jpg;%s to BibDoc"
                                  % (bibdoc_frame_docname,
                                     getval(job, 'bibdoc_subformat')))
                    bibdoc_frame.add_file_new_format(
                                    fname,
                                    version=1,
                                    description=getval(job, 'bibdoc_description'),
                                    comment=getval(job, 'bibdoc_comment'),
                                    docformat=bibdoc_frame_format)
            ## Remove the temporary folders
            _task_write_message("Removing temporary directory")
            shutil.rmtree(tmpdir)

        _BATCH_STEP = _BATCH_STEP + 1

    #-----------------#
    # FIX BIBDOC/MARC #
    #-----------------#

    _task_write_message("----------- Handling MARCXML -----------")

    ## Fix the BibDoc for all the videos previously created
    _task_write_message("Updating BibDoc of %s" % bibdoc_video_docname)
    bibdoc_video._build_file_list()

    ## Fix the MARC
    _task_write_message("Fixing MARC")
    cli_fix_marc({}, [batch_job['recid']], False)

    if getval(batch_job, 'collection'):
        ## Make the record visible by moving in from the collection
        marcxml = ("<record><controlfield tag=\"001\">%d</controlfield>"
                   "<datafield tag=\"980\" ind1=\" \" ind2=\" \">"
                   "<subfield code=\"a\">%s</subfield></datafield></record>"
                   ) % (batch_job['recid'], batch_job['collection'])
        upload_marcxml_file(marcxml)

    #---------------------#
    # ADD MASTER METADATA #
    #---------------------#

    if getval(batch_job, 'add_master_metadata'):
        _task_write_message("Adding master metadata")
        pbcore = pbcore_metadata(input_file = getval(batch_job, 'input'),
                                 pbcoreIdentifier = batch_job['recid'],
                                 aspect_override = getval(batch_job, 'aspect'))
        marcxml = format(pbcore, CFG_BIBENCODE_PBCORE_MARC_XSLT)
        upload_marcxml_file(marcxml)

    #------------------#
    # ADD MARC SNIPPET #
    #------------------#

    if getval(batch_job, 'marc_snippet'):
        marc_snippet = open(getval(batch_job, 'marc_snippet'))
        marcxml = marc_snippet.read()
        marc_snippet.close()
        upload_marcxml_file(marcxml)

    #--------------#
    # DELETE INPUT #
    #--------------#

    if getval(batch_job, 'delete_input'):
        _task_write_message("Deleting input file")
        # only if successfull
        if not return_code:
            # only if input matches pattern
            if getval(batch_job, 'delete_input_pattern', '') in getval(batch_job, 'input'):
                try:
                    os.remove(getval(batch_job, 'input'))
                except OSError:
                    pass

    #--------------#
    # NOTIFICATION #
    #--------------#

    ## Send Notification emails on errors
    if not return_code:
        if getval(batch_job, 'notify_user'):
            _notify_error_user(getval(batch_job, 'notify_user'),
                               getval(batch_job, 'submission_filename', batch_job['input']),
                               getval(batch_job, 'recid'),
                               getval(batch_job, 'submission_title', ""))
            _task_write_message("Notify user because of an error")
        if getval(batch_job, 'notify_admin'):
            _task_write_message("Notify admin because of an error")
            if type(getval(batch_job, 'notify_admin') == type(str()) ):
                _notify_error_admin(batch_job,
                                    getval(batch_job, 'notify_admin'))

            else:
                _notify_error_admin(batch_job)
    else:
        if getval(batch_job, 'notify_user'):
            _task_write_message("Notify user because of success")
            _notify_success_user(getval(batch_job, 'notify_user'),
                               getval(batch_job, 'submission_filename', batch_job['input']),
                               getval(batch_job, 'recid'),
                               getval(batch_job, 'submission_title', ""))
    return 1
def format_element(bfo):
    """ Prints the control number of an author authority record in HTML.
    By default prints brief version.

    @param brief: whether the 'brief' rather than the 'detailed' format
    @type brief: 'yes' or 'no'
    """

    from invenio.messages import gettext_set_language
    _ = gettext_set_language(bfo.lang)  # load the right message language

    control_nos = [d['a'] for d in bfo.fields('035__') if d['a']]
    previous_recIDs = []
    parameters = []
    count = None
    publications_formatted = []
    ## for every control number that this author has, find all the connected records for each one
    for control_no in control_nos:
        for ctrl_number_field_numbers in CFG_BIBAUTHORITY_RECORD_AUTHOR_CONTROL_NUMBER_FIELDS:
            parameters.append(ctrl_number_field_numbers + ":" +
                              control_no.replace(" ", ""))
        recIDs = [
            x for x in get_dependent_records_for_control_no(control_no)
            if x not in previous_recIDs
        ]
        count = len(recIDs)
        count_string = str(count) + " dependent records"
        from urllib import quote
        # if we have dependent records, provide a link to them
        if count:
            prefix_pattern = "<a href='" + CFG_SITE_URL + "%s" + "'>"
            postfix = "</a>"
            url_str = ''
            # print as many of the author's publications as the CFG_BIBAUTHORITY_PUBLICATION_VIEW_LIMIT allows
            for i in range(
                    count if count < CFG_BIBAUTHORITY_PUBLICATION_VIEW_LIMIT
                    else CFG_BIBAUTHORITY_PUBLICATION_VIEW_LIMIT):
                title = get_fieldvalues(recIDs[i], "245__a")
                if title:
                    url_str = "/record/" + str(recIDs[i])
                    prefix = prefix_pattern % url_str
                    publications_formatted.append(prefix + title[0] + postfix)

    title = "<strong>" + _("Publication(s)") + "</strong>"
    if publications_formatted:
        content = "<ul><li>" + "</li><li> ".join(
            publications_formatted) + "</li></ul>"
    else:
        content = "<strong style='color:red'>Missing !</strong>"

    p_val = quote(" or ".join(parameters))
    # include "&c=" parameter for bibliographic records
    # and one "&c=" parameter for authority records
    url_str = \
    "/search" + \
    "?p=" + p_val + \
    "&c=" + quote(CFG_SITE_NAME) + \
    "&c=" + CFG_BIBAUTHORITY_AUTHORITY_COLLECTION_NAME + \
    "&sc=1" + \
    "&ln=" + bfo.lang
    prefix = prefix_pattern % url_str
    content += prefix + "See all " + str(count) + " publications..." + postfix

    return "<p>" + title + ": " + content + "</p>"
Example #34
0
def render_hepdata_dataset_html(dataset, recid, seq, display_link=True):
    """ Rendering a single dataset
    @param display_link: Indicates if a link to the data record should be displayed
    @type display_link: boolean
    """
    from invenio.legacy.search_engine import get_fieldvalues

    should_expand_table = len(dataset.data) > 0

    # calculating the table width

    c = []  # collecting parts of the output
    # Fixing identifiers and classes typical for this particular dataset
    args = {
        "data_layer_class": ("hepdata_data_%i" % (seq,)),
        "plots_layer_class": ("hepdata_plots_%i" % (seq,)),
        "data_expander_id": ("hepdata_expander_%i" % (seq,)),
        "masterplot_layer_class": ("hepdata_masterplot_layer_%i" % (seq,)),
        "masterplot_expander_id": ("hepdata_masterplot_expander_%i" % (seq,)),
        "plots_rowspan": len(dataset.data),
        "masterplot_rowspan": len(dataset.data_qualifiers) + 3,
    }

    args["collapse_message_masterplot"] = "&#8595;&#8595;&#8595;Hide&#8595;&#8595;&#8595;"
    args["expand_message_masterplot"] = "&#8593;&#8593;&#8593;Plot&#8593;&#8593;&#8593;"

    args["onclick_code_masterplot_expand"] = (
        "expandCollapseDataPlots(this.parentNode.parentNode.parentNode.parentNode, '%(masterplot_layer_class)s', '%(plots_layer_class)s', '%(data_layer_class)s', '%(masterplot_expander_id)s', '%(collapse_message_masterplot)s', '%(expand_message_masterplot)s');"
        % args
    )

    args["collapse_message_moredata"] = "&#8593;&#8593;&#8593;Collapse&#8593;&#8593;&#8593;"
    args["expand_message_moredata"] = "&#8595;&#8595;&#8595;Expand&#8595;&#8595;&#8595;"

    args["onclick_code_moredata_expand"] = (
        "return expandCollapseDataPlots(this.parentNode.parentNode.parentNode.parentNode, '%(data_layer_class)s','%(plots_layer_class)s', '%(masterplot_layer_class)s', '%(data_expander_id)s', '%(collapse_message_moredata)s', '%(expand_message_moredata)s');"
        % args
    )

    args["expander_colspan"] = dataset.num_columns + 2  # table_width + 2
    args["plots_code"] = render_plots_page(dataset, recid, seq)
    multiplot_url = get_hepdatamultiplot_image_url(recid, dataset)
    if multiplot_url:
        args["multiplot_url"] = multiplot_url

    # rendering the HTML code

    c.append('<div style="background-color: #ececec; padding:10px;">')
    # baseurl = get_hepdata_link(recid)
    # c.append("<h3><a href=\"%s/d%i\">%s</a></h3>" % (baseurl, seq, dataset.name, ))
    for fmt in dataset.additional_files:
        c.append('<a href="%s/%s">%s</a>' % (CFG_HEPDATA_URL, fmt[0], fmt[1]))

    dataset.comments.strip()
    c.append("<br />")
    c.append("<b>Description: </b> " + dataset.comments + "<br />")
    c.append("<br />")

    publisher = get_fieldvalues(dataset.recid, "520__9")

    link_txt = "Go to the record"
    if display_link:
        c.append('<a href="%s/%s/%s">%s</a>' % (CFG_BASE_URL, CFG_SITE_RECORD, str(dataset.recid), link_txt))

    temporary = get_fieldvalues(dataset.recid, "500__a")
    if temporary:
        temporary = temporary[0]

    if publisher[0] == "HEPDATA" and temporary != "* Temporary entry *":
        c.append('<div class="hepdataTablePlaceholder">')
        c.append('<table cellpadding="0" cellspacing="0" class="hepdataTable">')

        # rendering files links
        plain_file_url = get_fieldvalues(dataset.recid, "8564_u")
        if plain_file_url:
            c.append(
                '<tr><td colspan="%(colspan)s" style="text-align: left;"> <a href="%(plain_file_url)s"> <img src="%(site_url)s/img/file-icon-text-15x20.gif"></img><br> Plain</td>'
                % {"site_url": CFG_BASE_URL, "plain_file_url": plain_file_url[0], "colspan": str(dataset.num_columns)}
            )

            c.append(
                """<td rowspan="%(rowspan)i" class="expanderTableCell masterPlotExpanderTableCell">"""
                % {"rowspan": len(dataset.data_qualifiers) + 3}
            )
            if multiplot_url:
                c.append(
                    """<p class="expander masterPlotExpander" onclick="%(onclick_code_masterplot_expand)s" id="%(masterplot_expander_id)s"><a>%(expand_message_masterplot)s</a></p>"""
                    % args
                )
            c.append("</td>")
            c.append(
                '<td class="masterplot_cell" rowspan="%(masterplot_rowspan)s"><div class="%(masterplot_layer_class)s" style="display:none;">'
                % args
            )
            if multiplot_url:
                c.append(
                    '<div><img src="%(multiplot_url)s" alt="The plot is not available" class="hepdataimg"></img></div>'
                    % args
                )

            c.append("</div></td>" % args)
            c.append("</tr>")
        else:
            from invenio.utils.hepdata.api import create_hepdata_ticket

            create_hepdata_ticket(dataset.recid, "Data missing in 8564_u")

        # rendering column titles
        c.append("<tr>")
        for title in dataset.column_titles:
            title_str = ""

            strip_str = html_strip(title["content"])
            if strip_str == ":":
                strip_str = ""
            additional_class = "hepdataTableTitleLayer"
            try:
                title_str = "$" + data_qualifier_to_LateX(strip_str) + "$"
            except:
                title_str = strip_str
            if title_str in ("", "$$"):
                title_str = ""
                additional_class = "hepdataTableEmptyTitleLayer"
            c.append(
                '<th colspan="%i" class="hepdataColumnHeader"><div class="%s">%s</div></th>'
                % (title["colspan"], additional_class, title_str)
            )
        c.append("</tr>")

        for data_line in dataset.data_qualifiers:
            c.append("<tr>")
            for data in data_line:
                qualifier_string = ""

                # stripping from spaces and single strings having only ":" sign
                strip_str = html_strip(data["content"])
                if strip_str == ":":
                    strip_str = ""
                additional_class = "hepdataQualifierLayer"
                try:
                    qualifier_string = "$" + data_qualifier_to_LateX(strip_str) + "$"

                except Exception, e:
                    qualifier_string = strip_str

                if qualifier_string in ("", "$$"):
                    qualifier_string = ""
                    additional_class = "hepdataEmptyQualifierLayer"
                c.append(
                    '<td colspan="%i" class="hepdataTableQualifierCell"><div class="%s">%s</div></td>'
                    % (data["colspan"], additional_class, qualifier_string)
                )
            c.append("</tr>")
        c.append("</td>")
        c.append("</tr>")

        c.append("<tr>")
        for header in dataset.column_headers:
            header_str = ""
            try:
                header_str = "$" + data_qualifier_to_LateX(header["content"]) + "$"
            except Exception, e:
                header_str = header["content"]

            c.append(
                '<th colspan="%i" class="hepdataColumnHeader"><div class="hepdataTableHeaderLayer">%s</div></th>'
                % (header["colspan"], header_str)
            )
Example #35
0
def process_batch_job(batch_job_file):
    """ Processes a batch job description dictionary

    @param batch_job_file: a fullpath to a batch job file
    @type batch_job_file: string
    @return: 1 if the process was successful, 0 if not
    @rtype; int
    """

    def upload_marcxml_file(marcxml):
        """ Creates a temporary marcxml file and sends it to bibupload
        """
        xml_filename = "bibencode_" + str(batch_job["recid"]) + "_" + str(uuid.uuid4()) + ".xml"
        xml_filename = os.path.join(invenio.config.CFG_TMPSHAREDDIR, xml_filename)
        xml_file = file(xml_filename, "w")
        xml_file.write(marcxml)
        xml_file.close()
        targs = ["-c", xml_filename]
        task_low_level_submission("bibupload", "bibencode", *targs)

    # ---------#
    # GENERAL #
    # ---------#

    _task_write_message("----------- Handling Master -----------")

    ## Check the validity of the batch file here
    batch_job = json_decode_file(batch_job_file)

    ## Sanitise batch description and raise errrors
    batch_job = sanitise_batch_job(batch_job)

    ## Check if the record exists
    if record_exists(batch_job["recid"]) < 1:
        raise Exception("Record not found")

    recdoc = BibRecDocs(batch_job["recid"])

    # --------------------#
    # UPDATE FROM MASTER #
    # --------------------#

    ## We want to add new stuff to the video's record, using the master as input
    if getval(batch_job, "update_from_master"):
        found_master = False
        bibdocs = recdoc.list_bibdocs()
        for bibdoc in bibdocs:
            bibdocfiles = bibdoc.list_all_files()
            for bibdocfile in bibdocfiles:
                comment = bibdocfile.get_comment()
                description = bibdocfile.get_description()
                subformat = bibdocfile.get_subformat()
                m_comment = getval(batch_job, "bibdoc_master_comment", comment)
                m_description = getval(batch_job, "bibdoc_master_description", description)
                m_subformat = getval(batch_job, "bibdoc_master_subformat", subformat)
                if comment == m_comment and description == m_description and subformat == m_subformat:
                    found_master = True
                    batch_job["input"] = bibdocfile.get_full_path()
                    ## Get the aspect of the from the record
                    try:
                        ## Assumes pbcore metadata mapping
                        batch_job["aspect"] = get_fieldvalues(124, CFG_BIBENCODE_ASPECT_RATIO_MARC_FIELD)[0]
                    except IndexError:
                        pass
                    break
            if found_master:
                break
        if not found_master:
            _task_write_message("Video master for record %d not found" % batch_job["recid"])
            task_update_progress("Video master for record %d not found" % batch_job["recid"])
            ## Maybe send an email?
            return 1

    ## Clean the job to do no upscaling etc
    if getval(batch_job, "assure_quality"):
        batch_job = clean_job_for_quality(batch_job)

    global _BATCH_STEPS
    _BATCH_STEPS = len(batch_job["jobs"])

    ## Generate the docname from the input filename's name or given name
    bibdoc_video_docname, bibdoc_video_extension = decompose_file(batch_job["input"])[1:]
    if not bibdoc_video_extension or getval(batch_job, "bibdoc_master_extension"):
        bibdoc_video_extension = getval(batch_job, "bibdoc_master_extension")
    if getval(batch_job, "bibdoc_master_docname"):
        bibdoc_video_docname = getval(batch_job, "bibdoc_master_docname")

    write_message("Creating BibDoc for %s" % bibdoc_video_docname)
    ## If the bibdoc exists, receive it
    if bibdoc_video_docname in recdoc.get_bibdoc_names():
        bibdoc_video = recdoc.get_bibdoc(bibdoc_video_docname)
    ## Create a new bibdoc if it does not exist
    else:
        bibdoc_video = recdoc.add_bibdoc(docname=bibdoc_video_docname)

    ## Get the directory auf the newly created bibdoc to copy stuff there
    bibdoc_video_directory = bibdoc_video.get_base_dir()

    # --------#
    # MASTER #
    # --------#
    if not getval(batch_job, "update_from_master"):
        if getval(batch_job, "add_master"):
            ## Generate the right name for the master
            ## The master should be hidden first an then renamed
            ## when it is really available
            ## !!! FIX !!!
            _task_write_message("Adding %s master to the BibDoc" % bibdoc_video_docname)
            master_format = compose_format(
                bibdoc_video_extension, getval(batch_job, "bibdoc_master_subformat", "master")
            )
            ## If a file of the same format is there, something is wrong, remove it!
            ## it might be caused by a previous corrupted submission etc.
            if bibdoc_video.format_already_exists_p(master_format):
                bibdoc_video.delete_file(master_format, 1)
            bibdoc_video.add_file_new_format(
                batch_job["input"],
                version=1,
                description=getval(batch_job, "bibdoc_master_description"),
                comment=getval(batch_job, "bibdoc_master_comment"),
                docformat=master_format,
            )

    # -----------#
    # JOBS LOOP #
    # -----------#

    return_code = 1
    global _BATCH_STEP

    for job in batch_job["jobs"]:

        _task_write_message("----------- Job %s of %s -----------" % (_BATCH_STEP, _BATCH_STEPS))

        ## Try to substitute docname with master docname
        if getval(job, "bibdoc_docname"):
            job["bibdoc_docname"] = Template(job["bibdoc_docname"]).safe_substitute(
                {"bibdoc_master_docname": bibdoc_video_docname}
            )

        # -------------#
        # TRANSCODING #
        # -------------#

        if job["mode"] == "encode":

            ## Skip the job if assure_quality is not set and marked as fallback
            if not getval(batch_job, "assure_quality") and getval(job, "fallback"):
                continue

            if getval(job, "profile"):
                profile = get_encoding_profile(job["profile"])
            else:
                profile = None
            ## We need an extension defined fot the video container
            bibdoc_video_extension = getval(job, "extension", getval(profile, "extension"))
            if not bibdoc_video_extension:
                raise Exception("No container/extension defined")
            ## Get the docname and subformat
            bibdoc_video_subformat = getval(job, "bibdoc_subformat")
            bibdoc_slave_video_docname = getval(job, "bibdoc_docname", bibdoc_video_docname)
            ## The subformat is incompatible with ffmpegs name convention
            ## We do the encoding without and rename it afterwards
            bibdoc_video_fullpath = compose_file(bibdoc_video_directory, bibdoc_video_extension)
            _task_write_message(
                "Transcoding %s to %s;%s" % (bibdoc_slave_video_docname, bibdoc_video_extension, bibdoc_video_subformat)
            )
            ## We encode now directly into the bibdocs directory
            encoding_result = encode_video(
                input_file=batch_job["input"],
                output_file=bibdoc_video_fullpath,
                acodec=getval(job, "audiocodec"),
                vcodec=getval(job, "videocodec"),
                abitrate=getval(job, "videobitrate"),
                vbitrate=getval(job, "audiobitrate"),
                resolution=getval(job, "resolution"),
                passes=getval(job, "passes", 1),
                special=getval(job, "special"),
                specialfirst=getval(job, "specialfirst"),
                specialsecond=getval(job, "specialsecond"),
                metadata=getval(job, "metadata"),
                width=getval(job, "width"),
                height=getval(job, "height"),
                aspect=getval(batch_job, "aspect"),  # Aspect for every job
                profile=getval(job, "profile"),
                update_fnc=_task_update_overall_status,
                message_fnc=_task_write_message,
            )
            return_code &= encoding_result
            ## only on success
            if encoding_result:
                ## Rename it, adding the subformat
                os.rename(
                    bibdoc_video_fullpath,
                    compose_file(
                        bibdoc_video_directory,
                        bibdoc_video_extension,
                        bibdoc_video_subformat,
                        1,
                        bibdoc_slave_video_docname,
                    ),
                )
                # bibdoc_video._build_file_list()
                bibdoc_video.touch()
                bibdoc_video._sync_to_db()
                bibdoc_video_format = compose_format(bibdoc_video_extension, bibdoc_video_subformat)
                if getval(job, "bibdoc_comment"):
                    bibdoc_video.set_comment(getval(job, "bibdoc_comment"), bibdoc_video_format)
                if getval(job, "bibdoc_description"):
                    bibdoc_video.set_description(getval(job, "bibdoc_description"), bibdoc_video_format)

        # ------------#
        # EXTRACTION #
        # ------------#

        # if there are multiple extraction jobs, all the produced files
        # with the same name will be in the same bibdoc! Make sure that
        # you use different subformats or docname templates to avoid
        # conflicts.

        if job["mode"] == "extract":
            if getval(job, "profile"):
                profile = get_extract_profile(job["profile"])
            else:
                profile = {}
            bibdoc_frame_subformat = getval(job, "bibdoc_subformat")
            _task_write_message("Extracting frames to temporary directory")
            tmpdir = invenio.config.CFG_TMPDIR + "/" + str(uuid.uuid4())
            os.mkdir(tmpdir)
            # Move this to the batch description
            bibdoc_frame_docname = getval(job, "bibdoc_docname", bibdoc_video_docname)
            tmpfname = (
                tmpdir
                + "/"
                + bibdoc_frame_docname
                + "."
                + getval(profile, "extension", getval(job, "extension", "jpg"))
            )
            extraction_result = extract_frames(
                input_file=batch_job["input"],
                output_file=tmpfname,
                size=getval(job, "size"),
                positions=getval(job, "positions"),
                numberof=getval(job, "numberof"),
                width=getval(job, "width"),
                height=getval(job, "height"),
                aspect=getval(batch_job, "aspect"),
                profile=getval(job, "profile"),
                update_fnc=_task_update_overall_status,
            )
            return_code &= extraction_result

            ## only on success:
            if extraction_result:
                ## for every filename in the directorys, create a bibdoc that contains
                ## all sizes of the frame from the two directories
                files = os.listdir(tmpdir)
                for filename in files:
                    ## The docname was altered by BibEncode extract through substitution
                    ## Retrieve it from the filename again
                    bibdoc_frame_docname, bibdoc_frame_extension = os.path.splitext(filename)
                    _task_write_message("Creating new bibdoc for %s" % bibdoc_frame_docname)
                    ## If the bibdoc exists, receive it
                    if bibdoc_frame_docname in recdoc.get_bibdoc_names():
                        bibdoc_frame = recdoc.get_bibdoc(bibdoc_frame_docname)
                    ## Create a new bibdoc if it does not exist
                    else:
                        bibdoc_frame = recdoc.add_bibdoc(docname=bibdoc_frame_docname)

                    ## The filename including path from tmpdir
                    fname = os.path.join(tmpdir, filename)

                    bibdoc_frame_format = compose_format(bibdoc_frame_extension, bibdoc_frame_subformat)
                    ## Same as with the master, if the format allready exists,
                    ## override it, because something went wrong before
                    if bibdoc_frame.format_already_exists_p(bibdoc_frame_format):
                        bibdoc_frame.delete_file(bibdoc_frame_format, 1)
                    _task_write_message(
                        "Adding %s jpg;%s to BibDoc" % (bibdoc_frame_docname, getval(job, "bibdoc_subformat"))
                    )
                    bibdoc_frame.add_file_new_format(
                        fname,
                        version=1,
                        description=getval(job, "bibdoc_description"),
                        comment=getval(job, "bibdoc_comment"),
                        docformat=bibdoc_frame_format,
                    )
            ## Remove the temporary folders
            _task_write_message("Removing temporary directory")
            shutil.rmtree(tmpdir)

        _BATCH_STEP = _BATCH_STEP + 1

    # -----------------#
    # FIX BIBDOC/MARC #
    # -----------------#

    _task_write_message("----------- Handling MARCXML -----------")

    ## Fix the BibDoc for all the videos previously created
    _task_write_message("Updating BibDoc of %s" % bibdoc_video_docname)
    bibdoc_video._build_file_list()

    ## Fix the MARC
    _task_write_message("Fixing MARC")
    cli_fix_marc({}, [batch_job["recid"]], False)

    if getval(batch_job, "collection"):
        ## Make the record visible by moving in from the collection
        marcxml = (
            '<record><controlfield tag="001">%d</controlfield>'
            '<datafield tag="980" ind1=" " ind2=" ">'
            '<subfield code="a">%s</subfield></datafield></record>'
        ) % (batch_job["recid"], batch_job["collection"])
        upload_marcxml_file(marcxml)

    # ---------------------#
    # ADD MASTER METADATA #
    # ---------------------#

    if getval(batch_job, "add_master_metadata"):
        _task_write_message("Adding master metadata")
        pbcore = pbcore_metadata(
            input_file=getval(batch_job, "input"),
            pbcoreIdentifier=batch_job["recid"],
            aspect_override=getval(batch_job, "aspect"),
        )
        marcxml = format(pbcore, CFG_BIBENCODE_PBCORE_MARC_XSLT)
        upload_marcxml_file(marcxml)

    # ------------------#
    # ADD MARC SNIPPET #
    # ------------------#

    if getval(batch_job, "marc_snippet"):
        marc_snippet = open(getval(batch_job, "marc_snippet"))
        marcxml = marc_snippet.read()
        marc_snippet.close()
        upload_marcxml_file(marcxml)

    # --------------#
    # DELETE INPUT #
    # --------------#

    if getval(batch_job, "delete_input"):
        _task_write_message("Deleting input file")
        # only if successfull
        if not return_code:
            # only if input matches pattern
            if getval(batch_job, "delete_input_pattern", "") in getval(batch_job, "input"):
                try:
                    os.remove(getval(batch_job, "input"))
                except OSError:
                    pass

    # --------------#
    # NOTIFICATION #
    # --------------#

    ## Send Notification emails on errors
    if not return_code:
        if getval(batch_job, "notify_user"):
            _notify_error_user(
                getval(batch_job, "notify_user"),
                getval(batch_job, "submission_filename", batch_job["input"]),
                getval(batch_job, "recid"),
                getval(batch_job, "submission_title", ""),
            )
            _task_write_message("Notify user because of an error")
        if getval(batch_job, "notify_admin"):
            _task_write_message("Notify admin because of an error")
            if type(getval(batch_job, "notify_admin") == type(str())):
                _notify_error_admin(batch_job, getval(batch_job, "notify_admin"))

            else:
                _notify_error_admin(batch_job)
    else:
        if getval(batch_job, "notify_user"):
            _task_write_message("Notify user because of success")
            _notify_success_user(
                getval(batch_job, "notify_user"),
                getval(batch_job, "submission_filename", batch_job["input"]),
                getval(batch_job, "recid"),
                getval(batch_job, "submission_title", ""),
            )
    return 1
Example #36
0
def Set_RN_From_Sysno(parameters, curdir, form, user_info=None):
    """
    Set the global variable 'rn' to the report number of the record
    identified by 'sysno' (recid) global variable.

    Useful at MBI step when the user specifies the record to modify
    using the recid instead of the report number.  Since most
    WebSubmit functions relies on the global 'rn' variable, it is
    necessary in these cases to include this function.

    This function MUST be preceded by 'Get_Recid' function.

    To identify the record to update via 'recid' instead of report
    number, one MUST on the MBI form request the recid/sysno using a
    form element named 'SN'.

    Parameters:

         edsrn - file where to write the report number if found

      rep_tags - comma-separater list of tags where the report number
                 can be found. Default is '037__a', '088__a', '021__a'
                 if no value is specified.

  record_search_pattern - this enforces restrictions on which type of
                 documents can be modified via a certain submission
                 interface. If the record_search_pattern is not
                 defined, no restriction will be enforced.  The
                 record_search_pattern can be anything that can be
                 used by search_pattern to search for. Also, one can
                 use variables stored locally, like
                 &lt;comboDEMOJRN&gt; to denote the category or
                 subcategory.
                 Ex:
                    reportnumber:DEMO-&lt;comboDEMOJRN&gt;-*
                    collection:ATLANTISTIMESNEWS
                    reportnumber:DEMO-&lt;comboDEMOJRN&gt;-* | collection:ATLANTISTIMESNEWS

                 As a note, you can test your pattern, using the
                 search engine and see if it retrieves the expected
                 results.

                 WARNING: this check is not applied if the report
                 number has already been written to 'edsrn' file.

    Exceptions raised:
        + InvenioWebSubmitFunctionStop
              - if trying to access unauthorized path to read/write report number;
              - if accessing a recid that does not exist or is deleted;
              - if recid should not be handled by the current submission;
    """
    global rn, sysno
    if not sysno:
        return

    try:
        sysno = int(sysno)
    except:
        raise InvenioWebSubmitFunctionStop(CFG_ALERT_RECORD_ID_MUST_BE_INT % \
                                           cgi.escape((sysno)))

    edsrn = parameters['edsrn']
    path_to_repnum_file = os.path.join(curdir, edsrn)

    if not os.path.abspath(path_to_repnum_file).startswith(curdir):
        # Trying to access invalid path...
        raise InvenioWebSubmitFunctionStop(CFG_ALERT_INVALID_EDSRN_PATH % \
                                           (cgi.escape(path_to_repnum_file),
                                            cgi.escape(CFG_SITE_SUPPORT_EMAIL)))

    if os.path.exists(path_to_repnum_file):
        # Have we already written RN to disk? If so, read from there
        possible_rn = ParamFromFile(path_to_repnum_file)
        if possible_rn.strip():
            # No empty
            rn = possible_rn
            return

    if record_exists(sysno) != 1:
        # Record does not exist
        raise InvenioWebSubmitFunctionStop(CFG_ALERT_DOCUMENT_NOT_FOUND %
                                           sysno)

    ## Check if the record needs to comply to any restriction.
    ## Basically checks if this record can/should be handled by this submission
    if parameters['record_search_pattern']:
        if not is_record_matching_pattern(parameters['record_search_pattern'],
                                          sysno, curdir):
            # delete the SN file and reset the sysno,
            # because this record is not the good record to be hadled by this submission
            os.rename("%s/SN" % curdir, "%s/SN_WRONG" % curdir)
            sysno = ""
            raise InvenioWebSubmitFunctionStop(
                CFG_ALERT_WRONG_RECORD_FOR_THIS_SUBMISSION)

    rn_tags = [tag.strip() for tag in parameters['rep_tags'].split(',') \
               if tag.strip()]
    if not rn_tags:
        rn_tags = CFG_DEFAULT_RN_TAGS

    # Retrieve report number in metadata
    for rn_tag in rn_tags:
        possible_report_numbers = get_fieldvalues(sysno, rn_tag)
        if possible_report_numbers:
            rn = possible_report_numbers[0].strip()
            break

    edsrn = parameters['edsrn']
    path_to_repnum_file = os.path.join(curdir, edsrn)

    if rn and not os.path.exists(path_to_repnum_file):
        # Write report number to specified file
        fp = open(path_to_repnum_file, 'w')
        fp.write(rn)
        fp.close()
Example #37
0
def Set_RN_From_Sysno(parameters, curdir, form, user_info=None):
    """
    Set the global variable 'rn' to the report number of the record
    identified by 'sysno' (recid) global variable.

    Useful at MBI step when the user specifies the record to modify
    using the recid instead of the report number.  Since most
    WebSubmit functions relies on the global 'rn' variable, it is
    necessary in these cases to include this function.

    This function MUST be preceded by 'Get_Recid' function.

    To identify the record to update via 'recid' instead of report
    number, one MUST on the MBI form request the recid/sysno using a
    form element named 'SN'.

    Parameters:

         edsrn - file where to write the report number if found

      rep_tags - comma-separater list of tags where the report number
                 can be found. Default is '037__a', '088__a', '021__a'
                 if no value is specified.

  record_search_pattern - this enforces restrictions on which type of
                 documents can be modified via a certain submission
                 interface. If the record_search_pattern is not
                 defined, no restriction will be enforced.  The
                 record_search_pattern can be anything that can be
                 used by search_pattern to search for. Also, one can
                 use variables stored locally, like
                 &lt;comboDEMOJRN&gt; to denote the category or
                 subcategory.
                 Ex:
                    reportnumber:DEMO-&lt;comboDEMOJRN&gt;-*
                    collection:ATLANTISTIMESNEWS
                    reportnumber:DEMO-&lt;comboDEMOJRN&gt;-* | collection:ATLANTISTIMESNEWS

                 As a note, you can test your pattern, using the
                 search engine and see if it retrieves the expected
                 results.

                 WARNING: this check is not applied if the report
                 number has already been written to 'edsrn' file.

    Exceptions raised:
        + InvenioWebSubmitFunctionStop
              - if trying to access unauthorized path to read/write report number;
              - if accessing a recid that does not exist or is deleted;
              - if recid should not be handled by the current submission;
    """
    global rn, sysno
    if not sysno:
        return

    try:
        sysno = int(sysno)
    except:
        raise InvenioWebSubmitFunctionStop(CFG_ALERT_RECORD_ID_MUST_BE_INT % \
                                           cgi.escape((sysno)))

    edsrn = parameters['edsrn']
    path_to_repnum_file = os.path.join(curdir, edsrn)

    if not os.path.abspath(path_to_repnum_file).startswith(curdir):
        # Trying to access invalid path...
        raise InvenioWebSubmitFunctionStop(CFG_ALERT_INVALID_EDSRN_PATH % \
                                           (cgi.escape(path_to_repnum_file),
                                            cgi.escape(CFG_SITE_SUPPORT_EMAIL)))

    if os.path.exists(path_to_repnum_file):
        # Have we already written RN to disk? If so, read from there
        possible_rn = ParamFromFile(path_to_repnum_file)
        if possible_rn.strip():
            # No empty
            rn = possible_rn
            return

    if record_exists(sysno) != 1:
        # Record does not exist
        raise InvenioWebSubmitFunctionStop(CFG_ALERT_DOCUMENT_NOT_FOUND % sysno)

    ## Check if the record needs to comply to any restriction.
    ## Basically checks if this record can/should be handled by this submission
    if parameters['record_search_pattern']:
        if not is_record_matching_pattern(parameters['record_search_pattern'], sysno, curdir):
            # delete the SN file and reset the sysno,
            # because this record is not the good record to be hadled by this submission
            os.rename("%s/SN" % curdir, "%s/SN_WRONG" % curdir)
            sysno = ""
            raise InvenioWebSubmitFunctionStop(CFG_ALERT_WRONG_RECORD_FOR_THIS_SUBMISSION)

    rn_tags = [tag.strip() for tag in parameters['rep_tags'].split(',') \
               if tag.strip()]
    if not rn_tags:
        rn_tags  = CFG_DEFAULT_RN_TAGS

    # Retrieve report number in metadata
    for rn_tag in rn_tags:
        possible_report_numbers = get_fieldvalues(sysno, rn_tag)
        if possible_report_numbers:
            rn = possible_report_numbers[0].strip()
            break

    edsrn = parameters['edsrn']
    path_to_repnum_file = os.path.join(curdir, edsrn)

    if rn and not os.path.exists(path_to_repnum_file):
        # Write report number to specified file
        fp = open(path_to_repnum_file, 'w')
        fp.write(rn)
        fp.close()