コード例 #1
0
ファイル: engine.py プロジェクト: pombredanne/invenio-3
def _detect_collections_from_marcxml_file(recs):
    """
    Extract all possible recIDs from MARCXML file and guess collections
    for these recIDs.
    """
    from invenio.legacy.bibrecord import record_get_field_values
    from invenio.legacy.search_engine import guess_collection_of_a_record
    from invenio.legacy.bibupload.engine import find_record_from_sysno, \
                                  find_records_from_extoaiid, \
                                  find_record_from_oaiid

    dbcollids = {}
    sysno_tag = CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG
    oaiid_tag = CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG
    oai_tag = CFG_OAI_ID_FIELD
    for rec, dummy1, dummy2 in recs:
        if rec:
            for tag001 in record_get_field_values(rec, '001'):
                collection = guess_collection_of_a_record(int(tag001))
                dbcollids[collection] = 1
            for tag_sysno in record_get_field_values(rec,
                                                     tag=sysno_tag[:3],
                                                     ind1=sysno_tag[3],
                                                     ind2=sysno_tag[4],
                                                     code=sysno_tag[5]):
                record = find_record_from_sysno(tag_sysno)
                if record:
                    collection = guess_collection_of_a_record(int(record))
                    dbcollids[collection] = 1
            for tag_oaiid in record_get_field_values(rec,
                                                     tag=oaiid_tag[:3],
                                                     ind1=oaiid_tag[3],
                                                     ind2=oaiid_tag[4],
                                                     code=oaiid_tag[5]):
                try:
                    records = find_records_from_extoaiid(tag_oaiid)
                except Error:
                    records = []
                if records:
                    record = records.pop()
                    collection = guess_collection_of_a_record(int(record))
                    dbcollids[collection] = 1
            for tag_oai in record_get_field_values(rec,
                                                   tag=oai_tag[0:3],
                                                   ind1=oai_tag[3],
                                                   ind2=oai_tag[4],
                                                   code=oai_tag[5]):
                record = find_record_from_oaiid(tag_oai)
                if record:
                    collection = guess_collection_of_a_record(int(record))
                    dbcollids[collection] = 1
    return dbcollids.keys()
コード例 #2
0
ファイル: engine.py プロジェクト: ffelsner/invenio
def _detect_collections_from_marcxml_file(recs):
    """
    Extract all possible recIDs from MARCXML file and guess collections
    for these recIDs.
    """
    from invenio.legacy.bibrecord import record_get_field_values
    from invenio.legacy.search_engine import guess_collection_of_a_record
    from invenio.legacy.bibupload.engine import find_record_from_sysno, \
                                  find_records_from_extoaiid, \
                                  find_record_from_oaiid

    dbcollids = {}
    sysno_tag = CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG
    oaiid_tag = CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG
    oai_tag = CFG_OAI_ID_FIELD
    for rec, dummy1, dummy2 in recs:
        if rec:
            for tag001 in record_get_field_values(rec, '001'):
                collection = guess_collection_of_a_record(int(tag001))
                dbcollids[collection] = 1
            for tag_sysno in record_get_field_values(rec, tag=sysno_tag[:3],
                                                     ind1=sysno_tag[3],
                                                     ind2=sysno_tag[4],
                                                     code=sysno_tag[5]):
                record = find_record_from_sysno(tag_sysno)
                if record:
                    collection = guess_collection_of_a_record(int(record))
                    dbcollids[collection] = 1
            for tag_oaiid in record_get_field_values(rec, tag=oaiid_tag[:3],
                                                     ind1=oaiid_tag[3],
                                                     ind2=oaiid_tag[4],
                                                     code=oaiid_tag[5]):
                try:
                    records = find_records_from_extoaiid(tag_oaiid)
                except Error:
                    records = []
                if records:
                    record = records.pop()
                    collection = guess_collection_of_a_record(int(record))
                    dbcollids[collection] = 1
            for tag_oai in record_get_field_values(rec, tag=oai_tag[0:3],
                                                   ind1=oai_tag[3],
                                                   ind2=oai_tag[4],
                                                   code=oai_tag[5]):
                record = find_record_from_oaiid(tag_oai)
                if record:
                    collection = guess_collection_of_a_record(int(record))
                    dbcollids[collection] = 1
    return dbcollids.keys()
コード例 #3
0
ファイル: engine.py プロジェクト: ffelsner/invenio
def document_upload(req=None, folder="", matching="", mode="", exec_date="", exec_time="", ln=CFG_SITE_LANG, priority="1", email_logs_to=None):
    """ Take files from the given directory and upload them with the appropiate mode.
    @parameters:
        + folder: Folder where the files to upload are stored
        + matching: How to match file names with record fields (report number, barcode,...)
        + mode: Upload mode (append, revise, replace)
    @return: tuple (file, error code)
        file: file name causing the error to notify the user
        error code:
            1 - More than one possible recID, ambiguous behaviour
            2 - No records match that file name
            3 - File already exists
    """
    import sys
    from invenio.legacy.bibdocfile.api import BibRecDocs, file_strip_ext
    from invenio.utils.hash import md5
    import shutil
    from invenio.legacy.search_engine import perform_request_search, \
                                      search_pattern, \
                                      guess_collection_of_a_record
    _ = gettext_set_language(ln)
    errors = []
    info = [0, []] # Number of files read, name of the files
    try:
        files = os.listdir(folder)
    except OSError as error:
        errors.append(("", error))
        return errors, info
    err_desc = {1: _("More than one possible recID, ambiguous behaviour"), 2: _("No records match that file name"),
                3: _("File already exists"), 4: _("A file with the same name and format already exists")}
    # Create directory DONE/ if doesn't exist
    folder = (folder[-1] == "/") and folder or (folder + "/")
    files_done_dir = folder + "DONE/"
    try:
        os.mkdir(files_done_dir)
    except OSError:
        # Directory exists or no write permission
        pass
    for docfile in files:
        if os.path.isfile(os.path.join(folder, docfile)):
            info[0] += 1
            identifier = file_strip_ext(docfile)
            extension = docfile[len(identifier):]
            rec_id = None
            if identifier:
                rec_id = search_pattern(p=identifier, f=matching, m='e')
            if not rec_id:
                errors.append((docfile, err_desc[2]))
                continue
            elif len(rec_id) > 1:
                errors.append((docfile, err_desc[1]))
                continue
            else:
                rec_id = str(list(rec_id)[0])
            rec_info = BibRecDocs(rec_id)
            if rec_info.bibdocs:
                for bibdoc in rec_info.bibdocs:
                    attached_files = bibdoc.list_all_files()
                    file_md5 = md5(open(os.path.join(folder, docfile), "rb").read()).hexdigest()
                    num_errors = len(errors)
                    for attached_file in attached_files:
                        if attached_file.checksum == file_md5:
                            errors.append((docfile, err_desc[3]))
                            break
                        elif attached_file.get_full_name() == docfile:
                            errors.append((docfile, err_desc[4]))
                            break
                if len(errors) > num_errors:
                    continue
            # Check if user has rights to upload file
            if req is not None:
                file_collection = guess_collection_of_a_record(int(rec_id))
                auth_code, auth_message = acc_authorize_action(req, 'runbatchuploader', collection=file_collection)
                if auth_code != 0:
                    error_msg = _("No rights to upload to collection '%(x_name)s'", x_name=file_collection)
                    errors.append((docfile, error_msg))
                    continue
            # Move document to be uploaded to temporary folder
            (fd, tmp_file) = tempfile.mkstemp(prefix=identifier + "_" + time.strftime("%Y%m%d%H%M%S", time.localtime()) + "_", suffix=extension, dir=CFG_TMPSHAREDDIR)
            shutil.copy(os.path.join(folder, docfile), tmp_file)
            # Create MARC temporary file with FFT tag and call bibupload
            (fd, filename) = tempfile.mkstemp(prefix=identifier + '_', dir=CFG_TMPSHAREDDIR)
            filedesc = os.fdopen(fd, 'w')
            marc_content = """ <record>
                                    <controlfield tag="001">%(rec_id)s</controlfield>
                                        <datafield tag="FFT" ind1=" " ind2=" ">
                                            <subfield code="n">%(name)s</subfield>
                                            <subfield code="a">%(path)s</subfield>
                                        </datafield>
                               </record> """ % {'rec_id': rec_id,
                                                'name': encode_for_xml(identifier),
                                                'path': encode_for_xml(tmp_file),
                                                }
            filedesc.write(marc_content)
            filedesc.close()
            info[1].append(docfile)
            user = ""
            if req is not None:
                user_info = collect_user_info(req)
                user = user_info['nickname']
            if not user:
                user = "******"
            # Execute bibupload with the appropiate mode

            task_arguments = ('bibupload', user, "--" + mode,
                              "--priority=" + priority, "-N", "batchupload")

            if exec_date:
                date = '--runtime=' + "\'" + exec_date + ' ' + exec_time + "\'"
                task_arguments += (date, )
            if email_logs_to:
                task_arguments += ("--email-logs-to", email_logs_to)
            task_arguments += (filename, )

            jobid = task_low_level_submission(*task_arguments)

            # write batch upload history
            run_sql("""INSERT INTO hstBATCHUPLOAD (user, submitdate,
                    filename, execdate, id_schTASK, batch_mode)
                    VALUES (%s, NOW(), %s, %s, %s, "document")""",
                    (user_info['nickname'], docfile,
                    exec_date != "" and (exec_date + ' ' + exec_time)
                    or time.strftime("%Y-%m-%d %H:%M:%S"), str(jobid)))

            # Move file to DONE folder
            done_filename = docfile + "_" + time.strftime("%Y%m%d%H%M%S", time.localtime()) + "_" + str(jobid)
            try:
                os.rename(os.path.join(folder, docfile), os.path.join(files_done_dir, done_filename))
            except OSError:
                errors.append('MoveError')
    return errors, info
コード例 #4
0
ファイル: engine.py プロジェクト: pombredanne/invenio-3
def document_upload(req=None,
                    folder="",
                    matching="",
                    mode="",
                    exec_date="",
                    exec_time="",
                    ln=CFG_SITE_LANG,
                    priority="1",
                    email_logs_to=None):
    """ Take files from the given directory and upload them with the appropiate mode.
    @parameters:
        + folder: Folder where the files to upload are stored
        + matching: How to match file names with record fields (report number, barcode,...)
        + mode: Upload mode (append, revise, replace)
    @return: tuple (file, error code)
        file: file name causing the error to notify the user
        error code:
            1 - More than one possible recID, ambiguous behaviour
            2 - No records match that file name
            3 - File already exists
    """
    import sys
    from invenio.legacy.bibdocfile.api import BibRecDocs, file_strip_ext
    from invenio.utils.hash import md5
    import shutil
    from invenio.legacy.search_engine import perform_request_search, \
                                      search_pattern, \
                                      guess_collection_of_a_record
    _ = gettext_set_language(ln)
    errors = []
    info = [0, []]  # Number of files read, name of the files
    try:
        files = os.listdir(folder)
    except OSError as error:
        errors.append(("", error))
        return errors, info
    err_desc = {
        1: _("More than one possible recID, ambiguous behaviour"),
        2: _("No records match that file name"),
        3: _("File already exists"),
        4: _("A file with the same name and format already exists")
    }
    # Create directory DONE/ if doesn't exist
    folder = (folder[-1] == "/") and folder or (folder + "/")
    files_done_dir = folder + "DONE/"
    try:
        os.mkdir(files_done_dir)
    except OSError:
        # Directory exists or no write permission
        pass
    for docfile in files:
        if os.path.isfile(os.path.join(folder, docfile)):
            info[0] += 1
            identifier = file_strip_ext(docfile)
            extension = docfile[len(identifier):]
            rec_id = None
            if identifier:
                rec_id = search_pattern(p=identifier, f=matching, m='e')
            if not rec_id:
                errors.append((docfile, err_desc[2]))
                continue
            elif len(rec_id) > 1:
                errors.append((docfile, err_desc[1]))
                continue
            else:
                rec_id = str(list(rec_id)[0])
            rec_info = BibRecDocs(rec_id)
            if rec_info.bibdocs:
                for bibdoc in rec_info.bibdocs:
                    attached_files = bibdoc.list_all_files()
                    file_md5 = md5(
                        open(os.path.join(folder, docfile),
                             "rb").read()).hexdigest()
                    num_errors = len(errors)
                    for attached_file in attached_files:
                        if attached_file.checksum == file_md5:
                            errors.append((docfile, err_desc[3]))
                            break
                        elif attached_file.get_full_name() == docfile:
                            errors.append((docfile, err_desc[4]))
                            break
                if len(errors) > num_errors:
                    continue
            # Check if user has rights to upload file
            if req is not None:
                file_collection = guess_collection_of_a_record(int(rec_id))
                auth_code, auth_message = acc_authorize_action(
                    req, 'runbatchuploader', collection=file_collection)
                if auth_code != 0:
                    error_msg = _(
                        "No rights to upload to collection '%(x_name)s'",
                        x_name=file_collection)
                    errors.append((docfile, error_msg))
                    continue
            # Move document to be uploaded to temporary folder
            (fd, tmp_file) = tempfile.mkstemp(
                prefix=identifier + "_" +
                time.strftime("%Y%m%d%H%M%S", time.localtime()) + "_",
                suffix=extension,
                dir=CFG_TMPSHAREDDIR)
            shutil.copy(os.path.join(folder, docfile), tmp_file)
            # Create MARC temporary file with FFT tag and call bibupload
            (fd, filename) = tempfile.mkstemp(prefix=identifier + '_',
                                              dir=CFG_TMPSHAREDDIR)
            filedesc = os.fdopen(fd, 'w')
            marc_content = """ <record>
                                    <controlfield tag="001">%(rec_id)s</controlfield>
                                        <datafield tag="FFT" ind1=" " ind2=" ">
                                            <subfield code="n">%(name)s</subfield>
                                            <subfield code="a">%(path)s</subfield>
                                        </datafield>
                               </record> """ % {
                'rec_id': rec_id,
                'name': encode_for_xml(identifier),
                'path': encode_for_xml(tmp_file),
            }
            filedesc.write(marc_content)
            filedesc.close()
            info[1].append(docfile)
            user = ""
            if req is not None:
                user_info = collect_user_info(req)
                user = user_info['nickname']
            if not user:
                user = "******"
            # Execute bibupload with the appropiate mode

            task_arguments = ('bibupload', user, "--" + mode,
                              "--priority=" + priority, "-N", "batchupload")

            if exec_date:
                date = '--runtime=' + "\'" + exec_date + ' ' + exec_time + "\'"
                task_arguments += (date, )
            if email_logs_to:
                task_arguments += ("--email-logs-to", email_logs_to)
            task_arguments += (filename, )

            jobid = task_low_level_submission(*task_arguments)

            # write batch upload history
            run_sql(
                """INSERT INTO hstBATCHUPLOAD (user, submitdate,
                    filename, execdate, id_schTASK, batch_mode)
                    VALUES (%s, NOW(), %s, %s, %s, "document")""",
                (user_info['nickname'], docfile, exec_date != "" and
                 (exec_date + ' ' + exec_time)
                 or time.strftime("%Y-%m-%d %H:%M:%S"), str(jobid)))

            # Move file to DONE folder
            done_filename = docfile + "_" + time.strftime(
                "%Y%m%d%H%M%S", time.localtime()) + "_" + str(jobid)
            try:
                os.rename(os.path.join(folder, docfile),
                          os.path.join(files_done_dir, done_filename))
            except OSError:
                errors.append('MoveError')
    return errors, info