Python decompose_file Examples, invenio.legacy.bibdocfile.api.decompose_file Python Examples

Example #1

0

Show file

File: Move_Files_Archive.py Project: chokribr/invenio-1

def Move_Files_Archive(parameters, curdir, form, user_info=None):
    """DEPRECATED: Use FFT instead."""
    MainDir = "%s/files/MainFiles" % curdir
    IncludeDir = "%s/files/AdditionalFiles" % curdir
    watcheddirs = {'Main' : MainDir, 'Additional' : IncludeDir}
    for type, dir in iteritems(watcheddirs):
        if os.path.exists(dir):
            formats = {}
            files = os.listdir(dir)
            files.sort()
            for file in files:
                dummy, filename, extension = decompose_file(file)
                if filename not in formats:
                    formats[filename] = []
                formats[filename].append(normalize_format(extension))
            # first delete all missing files
            bibarchive = BibRecDocs(sysno)
            existingBibdocs = bibarchive.list_bibdocs(type)
            for existingBibdoc in existingBibdocs:
                if bibarchive.get_docname(existingBibdoc.id) not in formats:
                    existingBibdoc.delete()
            # then create/update the new ones
            for key in formats.keys():
                # instanciate bibdoc object
                bibarchive.add_new_file('%s/%s%s' % (dir, key, formats[key]), doctype=type, never_fail=True)
    return ""

Example #2

0

Show file

File: pyexiv2_plugin.py Project: chokribr/invenio-1

def can_read_remote(inputfile):
    """Checks if inputfile is among metadata-readable
    file types
    @param inputfile: (string) path to the image
    @type inputfile: string
    @rtype: boolean
    @return: true if extension casn be handled"""

    # Check file type (0 base, 1 name, 2 ext)
    ext = decompose_file(inputfile)[2]
    return ext.lower() in ['.jpg', '.jpeg', 'jpe', '.jfif', '.jfi', '.jif']

Example #3

0

Show file

File: Add_Files.py Project: SCOAP3/invenio

def Add_Files(parameters, curdir, form, user_info=None):
    """DEPRECATED: Use FFT instead."""
    if os.path.exists("%s/files" % curdir):
        bibrecdocs = BibRecDocs(sysno)
        for current_file in os.listdir("%s/files" % curdir):
            fullpath = "%s/files/%s" % (curdir,current_file)
            dummy, filename, extension = decompose_file(current_file)
            if extension and extension[0] != ".":
                extension = '.' + extension
            if not bibrecdocs.check_file_exists(fullpath, extension):
                bibrecdocs.add_new_file(fullpath, "Main", never_fail=True)
    return ""

Example #4

0

Show file

def Add_Files(parameters, curdir, form, user_info=None):
    """DEPRECATED: Use FFT instead."""
    if os.path.exists("%s/files" % curdir):
        bibrecdocs = BibRecDocs(sysno)
        for current_file in os.listdir("%s/files" % curdir):
            fullpath = "%s/files/%s" % (curdir, current_file)
            dummy, filename, extension = decompose_file(current_file)
            if extension and extension[0] != ".":
                extension = '.' + extension
            if not bibrecdocs.check_file_exists(fullpath, extension):
                bibrecdocs.add_new_file(fullpath, "Main", never_fail=True)
    return ""

Example #5

0

Show file

File: pyexiv2_plugin.py Project: mhellmic/b2share

def can_read_remote(inputfile):
    """Checks if inputfile is among metadata-readable
    file types
    @param inputfile: (string) path to the image
    @type inputfile: string
    @rtype: boolean
    @return: true if extension casn be handled"""

    # Check file type (0 base, 1 name, 2 ext)
    ext = decompose_file(inputfile)[2]
    return ext.lower() in ['.jpg', '.jpeg', 'jpe',
                           '.jfif', '.jfi', '.jif']

Example #6

0

Show file

File: pdftk_plugin.py Project: mhellmic/b2share

def can_read_local(inputfile):
    """
    Checks if inputfile is among metadata-readable file types

    @param inputfile: path to the image
    @type inputfile: string
    @rtype: boolean
    @return: True if file can be processed
    """

    # Check file type (0 base, 1 name, 2 ext)
    ext = decompose_file(inputfile)[2]
    return ext.lower() in ['.pdf']

Example #7

0

Show file

File: pyexiv2_plugin.py Project: mhellmic/b2share

def can_write_local(inputfile):
    """
    Checks if inputfile is among metadata-writable file types

    @param inputfile: path to the image
    @type inputfile: string
    @rtype: boolean
    @return: True if file can be processed
    """
    # Check file type (0 base, 1 name, 2 ext)
    ext = decompose_file(inputfile)[2]
    return ext.lower() in ['.jpg', '.tiff', '.jpeg', 'jpe',
                           '.jfif', '.jfi', '.jif']

Example #8

0

Show file

def can_read_local(inputfile):
    """
    Checks if inputfile is among metadata-readable file types

    @param inputfile: path to the image
    @type inputfile: string
    @rtype: boolean
    @return: True if file can be processed
    """

    # Check file type (0 base, 1 name, 2 ext)
    ext = decompose_file(inputfile)[2]
    return ext.lower() in ['.pdf']

Example #9

0

Show file

File: pyexiv2_plugin.py Project: chokribr/invenio-1

def can_write_local(inputfile):
    """
    Checks if inputfile is among metadata-writable file types

    @param inputfile: path to the image
    @type inputfile: string
    @rtype: boolean
    @return: True if file can be processed
    """
    # Check file type (0 base, 1 name, 2 ext)
    ext = decompose_file(inputfile)[2]
    return ext.lower() in [
        '.jpg', '.tiff', '.jpeg', 'jpe', '.jfif', '.jfi', '.jif'
    ]

Example #10

0

Show file

File: utils.py Project: chokribr/invenio-1

def get_record_documents(recid, filename):
    """Yield LegacyBibDoc files from Documents."""
    from invenio.modules.records.api import get_record
    from invenio.modules.documents.api import Document
    from invenio.legacy.bibdocfile.api import decompose_file

    record = get_record(recid)
    duuids = [
        uuid for (k, uuid) in record.get('_documents', []) if k == filename
    ]

    for duuid in duuids:
        document = Document.get_document(duuid)
        if not document.is_authorized(current_user):
            current_app.logger.info(
                "Unauthorized access to /{recid}/files/{filename} "
                "({document}) by {current_user}".format(
                    recid=recid,
                    filename=filename,
                    document=document,
                    current_user=current_user))
            continue

        if document.get(
                'linked',
                False) and (document.get('uri').startswith('http://')
                            or document.get('uri').startswith('https://')):
            url = document.get('uri')
        else:
            url = url_for('record.file', recid=recid, filename=filename)

        (dummy, name, superformat) = decompose_file(filename)

        class LegacyBibDoc(object):
            def __init__(self, **kwargs):
                for key, value in kwargs.items():
                    setattr(self, key, value)

            def get_full_path(self):
                return document.get('uri')

            def get_recid(self):
                return recid

        yield LegacyBibDoc(name=name, superformat=superformat, url=url)

Example #11

0

Show file

File: extractor_plugin.py Project: chokribr/invenio-1

def can_read_local(inputfile):
    """
    Checks if inputfile is among metadata-readable file types

    @param inputfile: path to the image
    @type inputfile: string
    @rtype: boolean
    @return: True if file can be processed
    """

    # Check file type (0 base, 1 name, 2 ext)
    ext = decompose_file(inputfile)[2]
    return ext.lower() in [
        '.html', '.doc', '.ps', '.xls', '.ppt', '.ps', '.sxw', '.sdw', '.dvi',
        '.man', '.flac', '.mp3', '.nsf', '.sid', '.ogg', '.wav', '.png',
        '.deb', '.rpm', '.tar.gz', '.zip', '.elf', '.s3m', '.xm', '.it',
        '.flv', '.real', '.avi', '.mpeg', '.qt', '.asf'
    ]

Example #12

0

Show file

File: extractor_plugin.py Project: SCOAP3/invenio

def can_read_local(inputfile):
    """
    Checks if inputfile is among metadata-readable file types

    @param inputfile: path to the image
    @type inputfile: string
    @rtype: boolean
    @return: True if file can be processed
    """

    # Check file type (0 base, 1 name, 2 ext)
    ext = decompose_file(inputfile)[2]
    return ext.lower() in ['.html', '.doc', '.ps', '.xls', '.ppt',
                           '.ps', '.sxw', '.sdw', '.dvi', '.man', '.flac',
                           '.mp3', '.nsf', '.sid', '.ogg', '.wav', '.png',
                           '.deb', '.rpm', '.tar.gz', '.zip', '.elf',
                           '.s3m', '.xm', '.it', '.flv', '.real', '.avi',
                           '.mpeg', '.qt', '.asf']

Example #13

0

Show file

File: file_metadata.py Project: chokribr/invenio-1

def write_metadata(inputfile,
                   outputfile,
                   metadata_dictionary,
                   force=None,
                   verbose=0):
    """Write metadata to given file.

    Availability depends on input file format and installed plugins
    (return C{TypeError} if unsupported file format).

    @param inputfile: path to a file
    @type inputfile: string
    @param outputfile: path to the resulting file.
    @type outputfile: string
    @param verbose: verbosity
    @type verbose: int
    @param metadata_dictionary: keys and values of metadata to update.
    @type metadata_dictionary: dict
    @param force: name of plugin to use, to skip plugin auto-discovery
    @type force: string
    @return: output of the plugin
    @rtype: string
    @raise TypeError: if file format is not supported.
    @raise RuntimeError: if required library to process file is missing.
    @raise InvenioWebSubmitFileMetadataRuntimeError:
    when metadata cannot be updated.
    """
    # Check file type (0 base, 1 name, 2 ext)
    ext = decompose_file(inputfile)[2]
    if verbose > 5:
        print(ext.lower(), 'extension to write to')

    # Loop through the plugins to find a good one to ext
    for plugin_name, plugin in iteritems(metadata_extractor_plugins):
        if 'can_write_local' in plugin and \
            plugin['can_write_local'](inputfile) and \
                (not force or plugin_name == force):
            if verbose > 5:
                print('Using ' + plugin_name)
            return plugin['write_metadata_local'](inputfile, outputfile,
                                                  metadata_dictionary, verbose)

    # Case of no plugin found, raise
    raise TypeError('Unsupported file type')

Example #14

0

Show file

File: Shared_Functions.py Project: mhellmic/b2share

def createRelatedFormats(fullpath, overwrite=True, debug=False):
    """Given a fullpath, this function extracts the file's extension and
    finds in which additional format the file can be converted and converts it.
    @param fullpath: (string) complete path to file
    @param overwrite: (bool) overwrite already existing formats
    Return a list of the paths to the converted files
    """
    file_converter_logger = get_file_converter_logger()
    old_logging_level = file_converter_logger.getEffectiveLevel()
    if debug:
        file_converter_logger.setLevel(DEBUG)
    try:
        createdpaths = []
        basedir, filename, extension = decompose_file(fullpath)
        extension = extension.lower()
        if debug:
            print("basedir: %s, filename: %s, extension: %s" % (basedir, filename, extension), file=sys.stderr)

        filelist = glob.glob(os.path.join(basedir, '%s*' % filename))
        if debug:
            print("filelist: %s" % filelist, file=sys.stderr)
        missing_formats = get_missing_formats(filelist)
        if debug:
            print("missing_formats: %s" % missing_formats, file=sys.stderr)
        for path, formats in iteritems(missing_formats):
            if debug:
                print("... path: %s, formats: %s" % (path, formats), file=sys.stderr)
            for aformat in formats:
                if debug:
                    print("...... aformat: %s" % aformat, file=sys.stderr)
                newpath = os.path.join(basedir, filename + aformat)
                if debug:
                    print("...... newpath: %s" % newpath, file=sys.stderr)
                try:
                    convert_file(path, newpath)
                    createdpaths.append(newpath)
                except InvenioWebSubmitFileConverterError as msg:
                    if debug:
                        print("...... Exception: %s" % msg, file=sys.stderr)
                    register_exception(alert_admin=True)
    finally:
        if debug:
            file_converter_logger.setLevel(old_logging_level)
    return createdpaths

Example #15

0

Show file

File: file_metadata.py Project: mhellmic/b2share

def write_metadata(inputfile, outputfile, metadata_dictionary,
                   force=None, verbose=0):
    """
    Writes metadata to given file.

    Availability depends on input file format and installed plugins
    (return C{TypeError} if unsupported file format).

    @param inputfile: path to a file
    @type inputfile: string
    @param outputfile: path to the resulting file.
    @type outputfile: string
    @param verbose: verbosity
    @type verbose: int
    @param metadata_dictionary: keys and values of metadata to update.
    @type metadata_dictionary: dict
    @param force: name of plugin to use, to skip plugin auto-discovery
    @type force: string
    @return: output of the plugin
    @rtype: string
    @raise TypeError: if file format is not supported.
    @raise RuntimeError: if required library to process file is missing.
    @raise InvenioWebSubmitFileMetadataRuntimeError: when metadata cannot be updated.
    """
    # Check file type (0 base, 1 name, 2 ext)
    ext = decompose_file(inputfile)[2]
    if verbose > 5:
        print(ext.lower(), 'extension to write to')

    # Loop through the plugins to find a good one to ext
    for plugin_name, plugin in iteritems(metadata_extractor_plugins):
        if 'can_write_local' in plugin and \
            plugin['can_write_local'](inputfile) and \
            (not force or plugin_name == force):
            if verbose > 5:
                print('Using ' + plugin_name)
            return plugin['write_metadata_local'](inputfile,
                                                  outputfile,
                                                  metadata_dictionary,
                                                  verbose)

    # Case of no plugin found, raise
    raise TypeError, 'Unsupported file type'

Example #16

0

Show file

File: utils.py Project: jirikuncar/invenio-previewer

def get_record_documents(recid, filename):
    """Yield LegacyBibDoc files from Documents."""
    from invenio_records.api import get_record
    from invenio_documents.api import Document
    from invenio.legacy.bibdocfile.api import decompose_file

    record = get_record(recid)
    duuids = [uuid for (k, uuid) in record.get('_documents', [])
              if k == filename]

    for duuid in duuids:
        document = Document.get_document(duuid)
        if not document.is_authorized(current_user):
            current_app.logger.info(
                "Unauthorized access to /{recid}/files/{filename} "
                "({document}) by {current_user}".format(
                    recid=recid, filename=filename, document=document,
                    current_user=current_user))
            continue

        if document.get('linked', False) and (
                document.get('uri').startswith('http://') or
                document.get('uri').startswith('https://')):
            url = document.get('uri')
        else:
            url = url_for('record.file', recid=recid, filename=filename)

        (dummy, name, superformat) = decompose_file(filename)

        class LegacyBibDoc(object):

            def __init__(self, **kwargs):
                for key, value in kwargs.items():
                    setattr(self, key, value)

            def get_full_path(self):
                return document.get('uri')

            def get_recid(self):
                return recid

        yield LegacyBibDoc(name=name, superformat=superformat, url=url)

Example #17

0

Show file

File: Move_CKEditor_Files_to_Storage.py Project: chokribr/invenio-1

        def translate_link(match_obj):
            """Replace CKEditor link by 'local' record link. Also
            create the FFT for that link"""
            file_type = match_obj.group('type')
            file_name = match_obj.group('filename')
            uid = match_obj.group('uid')
            dummy, name, extension = decompose_file(file_name)
            new_url = build_url(sysno, name, file_type, extension)
            original_location = match_obj.group()[1:-1]
            icon_location = original_location
            # Prepare FFT that will fetch the file (+ the original
            # file in the case of images)
            if file_type == 'image':
                # Does original file exists, or do we just have the
                # icon? We expect the original file at a well defined
                # location
                possible_original_path = os.path.join(CFG_PREFIX,
                                                      'var', 'tmp',
                                                      'attachfile',
                                                      uid,
                                                      file_type,
                                                      'original',
                                                      file_name)
                if os.path.exists(possible_original_path):
                    icon_location = original_location
                    original_location = possible_original_path
                    new_url = build_url(sysno, name,
                                        file_type, extension, is_icon=True)

            docname = build_docname(name, file_type, extension)
            if original_location not in processed_paths:
                # Must create an FFT only if we have not yet processed
                # the file. This can happen if same image exists on
                # the same page (either in two different CKEditor
                # instances, or twice in the HTML)
                processed_paths.append(original_location)
                write_fft(curdir,
                          original_location,
                          docname,
                          icon_location,
                          doctype=file_type)
            return '"' + new_url + '"'

Example #18

0

Show file

File: bfe_webjournal_articles_overview.py Project: mhellmic/b2share

def _get_feature_image(record, ln=CFG_SITE_LANG):
    """
    Looks for an image that can be featured on the article overview page.
    """
    src = ''
    if ln == "fr":
        article = ''.join(record.fields('590__b'))
        if not article:
            article = ''.join(record.fields('520__b'))
    else:
        article = ''.join(record.fields('520__b'))
        if not article:
            article = ''.join(record.fields('590__b'))

    image = re.search(img_pattern, article)
    if image:
        src = image.group("image")
    if not src:
        # Look for an attached image
        icons = [icon for icon in record.fields('8564_q') if \
                (decompose_file(icon)[2] in ['jpg', 'jpeg', 'png', 'gif'])]
        if icons:
            src = icons[0]
    return src

Example #19

0

Show file

File: bfe_webjournal_articles_overview.py Project: chokribr/invenio-1

def _get_feature_image(record, ln=CFG_SITE_LANG):
    """
    Looks for an image that can be featured on the article overview page.
    """
    src = ''
    if ln == "fr":
        article = ''.join(record.fields('590__b'))
        if not article:
            article = ''.join(record.fields('520__b'))
    else:
        article = ''.join(record.fields('520__b'))
        if not article:
            article = ''.join(record.fields('590__b'))

    image = re.search(img_pattern, article)
    if image:
        src = image.group("image")
    if not src:
        # Look for an attached image
        icons = [icon for icon in record.fields('8564_q') if \
                (decompose_file(icon)[2] in ['jpg', 'jpeg', 'png', 'gif'])]
        if icons:
            src = icons[0]
    return src

Example #20

0

Show file

def process_CKEditor_upload(form,
                            uid,
                            user_files_path,
                            user_files_absolute_path,
                            recid=None,
                            allowed_types=default_allowed_types):
    """
    Process a file upload request.

    @param form: the form as in req object.
    @type form: dict
    @param uid: the user ID of the user uploading the file.
    @type uid: int
    @param user_files_path: the base URL where the file can be
        accessed from the web after upload.
        Note that you have to implement your own handler to stream the files from the directory
        C{user_files_absolute_path} if you set this value.
    @type user_files_path: string
    @param user_files_absolute_path: the base path on the server where
        the files should be saved.
        Eg:C{%(CFG_DATADIR)s/comments/%(recid)s/%(uid)s}
    @type user_files_absolute_path: string
    @param recid: the record ID for which we upload a file. Leave None if not relevant.
    @type recid: int
    @param allowed_types: types allowed for uploading. These
        are supported by CKEditor: ['File', 'Image', 'Flash', 'Media']
    @type allowed_types: list of strings
    @return: (msg, uploaded_file_path, uploaded_file_name, uploaded_file_url, callback_function)
    """
    msg = ''
    filename = ''
    formfile = None
    uploaded_file_path = ''
    user_files_path = ''

    for key, formfields in form.items():
        if key != 'upload':
            continue
        if hasattr(formfields, "filename") and formfields.filename:
            # We have found our file
            filename = formfields.filename
            formfile = formfields.file
            break

    can_upload_file_p = False
    if not form['type'] in allowed_types:
        # Is the type sent through the form ok?
        msg = 'You are not allowed to upload a file of this type'
    else:
        # Is user allowed to upload such file extension?
        basedir, name, extension = decompose_file(filename)
        extension = extension[1:]  # strip leading dot
        if extension in allowed_extensions.get(form['type'], []):
            can_upload_file_p = True

    if not can_upload_file_p:
        msg = 'You are not allowed to upload a file of this type'
    elif filename and formfile:
        ## Before saving the file to disk, wash the filename (in particular
        ## washing away UNIX and Windows (e.g. DFS) paths):
        filename = os.path.basename(filename.split('\\')[-1])
        # Remove \ / | : ? *
        filename = re.sub(
            '\\\\|\\/|\\||\\:|\\?|\\*|"|<|>|[\x00-\x1f\x7f-\x9f]/', '_',
            filename)
        filename = filename.strip()
        if filename != "":
            # Check that file does not already exist
            n = 1
            while os.path.exists(
                    os.path.join(user_files_absolute_path, filename)):
                basedir, name, extension = decompose_file(filename)
                new_name = propose_next_docname(name)
                filename = new_name + extension

            # This may be dangerous if the file size is bigger than the available memory
            fp = open(os.path.join(user_files_absolute_path, filename), "w")
            fp.write(formfile.read())
            fp.close()

            uploaded_file_path = os.path.join(user_files_absolute_path,
                                              filename)
            uploaded_file_name = filename

    return (msg, uploaded_file_path, filename, user_files_path,
            form['CKEditorFuncNum'])

Example #21

0

Show file

File: batch_engine.py Project: jaags/invenio

def process_batch_job(batch_job_file):
    """ Processes a batch job description dictionary

    @param batch_job_file: a fullpath to a batch job file
    @type batch_job_file: string
    @return: 1 if the process was successful, 0 if not
    @rtype; int
    """
    from invenio.legacy.bibdocfile.cli import cli_fix_marc

    def upload_marcxml_file(marcxml):
        """ Creates a temporary marcxml file and sends it to bibupload
        """
        xml_filename = 'bibencode_'+ str(batch_job['recid']) + '_' + str(uuid.uuid4()) + '.xml'
        xml_filename = os.path.join(invenio.config.CFG_TMPSHAREDDIR, xml_filename)
        xml_file = file(xml_filename, 'w')
        xml_file.write(marcxml)
        xml_file.close()
        targs = ['-c', xml_filename]
        task_low_level_submission('bibupload', 'bibencode', *targs)

    #---------#
    # GENERAL #
    #---------#

    _task_write_message("----------- Handling Master -----------")

    ## Check the validity of the batch file here
    batch_job = json_decode_file(batch_job_file)

    ## Sanitise batch description and raise errrors
    batch_job = sanitise_batch_job(batch_job)

    ## Check if the record exists
    # if record_exists(batch_job['recid']) < 1:
    #     raise Exception("Record not found")

    recdoc = BibRecDocs(batch_job['recid'])

    #--------------------#
    # UPDATE FROM MASTER #
    #--------------------#

    ## We want to add new stuff to the video's record, using the master as input
    if getval(batch_job, 'update_from_master'):
        found_master = False
        bibdocs = recdoc.list_bibdocs()
        for bibdoc in bibdocs:
            bibdocfiles = bibdoc.list_all_files()
            for bibdocfile in bibdocfiles:
                comment = bibdocfile.get_comment()
                description = bibdocfile.get_description()
                subformat = bibdocfile.get_subformat()
                m_comment = getval(batch_job, 'bibdoc_master_comment', comment)
                m_description = getval(batch_job, 'bibdoc_master_description', description)
                m_subformat = getval(batch_job, 'bibdoc_master_subformat', subformat)
                if (comment == m_comment and
                    description == m_description and
                    subformat == m_subformat):
                    found_master = True
                    batch_job['input'] = bibdocfile.get_full_path()
                    ## Get the aspect of the from the record
                    try:
                        ## Assumes pbcore metadata mapping
                        batch_job['aspect'] = get_fieldvalues(124, CFG_BIBENCODE_ASPECT_RATIO_MARC_FIELD)[0]
                    except IndexError:
                        pass
                    break
            if found_master:
                break
        if not found_master:
            _task_write_message("Video master for record %d not found"
                          % batch_job['recid'])
            task_update_progress("Video master for record %d not found"
                                 % batch_job['recid'])
            ## Maybe send an email?
            return 1

    ## Clean the job to do no upscaling etc
    if getval(batch_job, 'assure_quality'):
        batch_job = clean_job_for_quality(batch_job)

    global _BATCH_STEPS
    _BATCH_STEPS = len(batch_job['jobs'])

    ## Generate the docname from the input filename's name or given name
    bibdoc_video_docname, bibdoc_video_extension = decompose_file(batch_job['input'])[1:]
    if not bibdoc_video_extension or getval(batch_job, 'bibdoc_master_extension'):
        bibdoc_video_extension = getval(batch_job, 'bibdoc_master_extension')
    if getval(batch_job, 'bibdoc_master_docname'):
        bibdoc_video_docname = getval(batch_job, 'bibdoc_master_docname')

    write_message("Creating BibDoc for %s" % bibdoc_video_docname)
    ## If the bibdoc exists, receive it
    if bibdoc_video_docname in recdoc.get_bibdoc_names():
        bibdoc_video = recdoc.get_bibdoc(bibdoc_video_docname)
    ## Create a new bibdoc if it does not exist
    else:
        bibdoc_video = recdoc.add_bibdoc(docname=bibdoc_video_docname)

    ## Get the directory auf the newly created bibdoc to copy stuff there
    bibdoc_video_directory = bibdoc_video.get_base_dir()

    #--------#
    # MASTER #
    #--------#
    if not getval(batch_job, 'update_from_master'):
        if getval(batch_job, 'add_master'):
            ## Generate the right name for the master
            ## The master should be hidden first an then renamed
            ## when it is really available
            ## !!! FIX !!!
            _task_write_message("Adding %s master to the BibDoc"
                          % bibdoc_video_docname)
            master_format = compose_format(
                                    bibdoc_video_extension,
                                    getval(batch_job, 'bibdoc_master_subformat', 'master')
                                    )
            ## If a file of the same format is there, something is wrong, remove it!
            ## it might be caused by a previous corrupted submission etc.
            if bibdoc_video.format_already_exists_p(master_format):
                bibdoc_video.delete_file(master_format, 1)
            bibdoc_video.add_file_new_format(
                    batch_job['input'],
                    version=1,
                    description=getval(batch_job, 'bibdoc_master_description'),
                    comment=getval(batch_job, 'bibdoc_master_comment'),
                    docformat=master_format
                    )

    #-----------#
    # JOBS LOOP #
    #-----------#

    return_code = 1
    global _BATCH_STEP

    for job in batch_job['jobs']:

        _task_write_message("----------- Job %s of %s -----------"
                           % (_BATCH_STEP, _BATCH_STEPS))

        ## Try to substitute docname with master docname
        if getval(job, 'bibdoc_docname'):
            job['bibdoc_docname'] = Template(job['bibdoc_docname']).safe_substitute({'bibdoc_master_docname': bibdoc_video_docname})

        #-------------#
        # TRANSCODING #
        #-------------#

        if job['mode'] == 'encode':

            ## Skip the job if assure_quality is not set and marked as fallback
            if not getval(batch_job, 'assure_quality') and getval(job, 'fallback'):
                continue

            if getval(job, 'profile'):
                profile = get_encoding_profile(job['profile'])
            else:
                profile = None
            ## We need an extension defined fot the video container
            bibdoc_video_extension = getval(job, 'extension',
                                            getval(profile, 'extension'))
            if not bibdoc_video_extension:
                raise Exception("No container/extension defined")
            ## Get the docname and subformat
            bibdoc_video_subformat = getval(job, 'bibdoc_subformat')
            bibdoc_slave_video_docname = getval(job, 'bibdoc_docname', bibdoc_video_docname)
            ## The subformat is incompatible with ffmpegs name convention
            ## We do the encoding without and rename it afterwards
            bibdoc_video_fullpath = compose_file(
                                                 bibdoc_video_directory,
                                                 bibdoc_video_extension
                                                 )
            _task_write_message("Transcoding %s to %s;%s" % (bibdoc_slave_video_docname,
                                bibdoc_video_extension,
                                bibdoc_video_subformat))
            ## We encode now directly into the bibdocs directory
            encoding_result = encode_video(
                 input_file=batch_job['input'],
                 output_file=bibdoc_video_fullpath,
                 acodec=getval(job, 'audiocodec'),
                 vcodec=getval(job, 'videocodec'),
                 abitrate=getval(job, 'videobitrate'),
                 vbitrate=getval(job, 'audiobitrate'),
                 resolution=getval(job, 'resolution'),
                 passes=getval(job, 'passes', 1),
                 special=getval(job, 'special'),
                 specialfirst=getval(job, 'specialfirst'),
                 specialsecond=getval(job, 'specialsecond'),
                 metadata=getval(job, 'metadata'),
                 width=getval(job, 'width'),
                 height=getval(job, 'height'),
                 aspect=getval(batch_job, 'aspect'), # Aspect for every job
                 profile=getval(job, 'profile'),
                 update_fnc=_task_update_overall_status,
                 message_fnc=_task_write_message
                 )
            return_code &= encoding_result
            ## only on success
            if  encoding_result:
                ## Rename it, adding the subformat
                os.rename(bibdoc_video_fullpath,
                          compose_file(bibdoc_video_directory,
                                       bibdoc_video_extension,
                                       bibdoc_video_subformat,
                                       1,
                                       bibdoc_slave_video_docname)
                          )
                #bibdoc_video._build_file_list()
                bibdoc_video.touch()
                bibdoc_video._sync_to_db()
                bibdoc_video_format = compose_format(bibdoc_video_extension,
                                                     bibdoc_video_subformat)
                if getval(job, 'bibdoc_comment'):
                    bibdoc_video.set_comment(getval(job, 'bibdoc_comment'),
                                              bibdoc_video_format)
                if getval(job, 'bibdoc_description'):
                    bibdoc_video.set_description(getval(job, 'bibdoc_description'),
                                                 bibdoc_video_format)

        #------------#
        # EXTRACTION #
        #------------#

        # if there are multiple extraction jobs, all the produced files
        # with the same name will be in the same bibdoc! Make sure that
        # you use different subformats or docname templates to avoid
        # conflicts.

        if job['mode'] == 'extract':
            if getval(job, 'profile'):
                profile = get_extract_profile(job['profile'])
            else:
                profile = {}
            bibdoc_frame_subformat = getval(job, 'bibdoc_subformat')
            _task_write_message("Extracting frames to temporary directory")
            tmpdir = invenio.config.CFG_TMPDIR + "/" + str(uuid.uuid4())
            os.mkdir(tmpdir)
            #Move this to the batch description
            bibdoc_frame_docname = getval(job, 'bibdoc_docname', bibdoc_video_docname)
            tmpfname = (tmpdir + "/" + bibdoc_frame_docname + '.'
                        + getval(profile, 'extension',
                        getval(job, 'extension', 'jpg')))
            extraction_result = extract_frames(input_file=batch_job['input'],
                           output_file=tmpfname,
                           size=getval(job, 'size'),
                           positions=getval(job, 'positions'),
                           numberof=getval(job, 'numberof'),
                           width=getval(job, 'width'),
                           height=getval(job, 'height'),
                           aspect=getval(batch_job, 'aspect'),
                           profile=getval(job, 'profile'),
                           update_fnc=_task_update_overall_status,
                           )
            return_code &= extraction_result

            ## only on success:
            if extraction_result:
                ## for every filename in the directorys, create a bibdoc that contains
                ## all sizes of the frame from the two directories
                files = os.listdir(tmpdir)
                for filename in files:
                    ## The docname was altered by BibEncode extract through substitution
                    ## Retrieve it from the filename again
                    bibdoc_frame_docname, bibdoc_frame_extension = os.path.splitext(filename)
                    _task_write_message("Creating new bibdoc for %s" % bibdoc_frame_docname)
                    ## If the bibdoc exists, receive it
                    if bibdoc_frame_docname in recdoc.get_bibdoc_names():
                        bibdoc_frame = recdoc.get_bibdoc(bibdoc_frame_docname)
                    ## Create a new bibdoc if it does not exist
                    else:
                        bibdoc_frame = recdoc.add_bibdoc(docname=bibdoc_frame_docname)

                    ## The filename including path from tmpdir
                    fname = os.path.join(tmpdir, filename)

                    bibdoc_frame_format = compose_format(bibdoc_frame_extension, bibdoc_frame_subformat)
                    ## Same as with the master, if the format allready exists,
                    ## override it, because something went wrong before
                    if bibdoc_frame.format_already_exists_p(bibdoc_frame_format):
                        bibdoc_frame.delete_file(bibdoc_frame_format, 1)
                    _task_write_message("Adding %s jpg;%s to BibDoc"
                                  % (bibdoc_frame_docname,
                                     getval(job, 'bibdoc_subformat')))
                    bibdoc_frame.add_file_new_format(
                                    fname,
                                    version=1,
                                    description=getval(job, 'bibdoc_description'),
                                    comment=getval(job, 'bibdoc_comment'),
                                    docformat=bibdoc_frame_format)
            ## Remove the temporary folders
            _task_write_message("Removing temporary directory")
            shutil.rmtree(tmpdir)

        _BATCH_STEP = _BATCH_STEP + 1

    #-----------------#
    # FIX BIBDOC/MARC #
    #-----------------#

    _task_write_message("----------- Handling MARCXML -----------")

    ## Fix the BibDoc for all the videos previously created
    _task_write_message("Updating BibDoc of %s" % bibdoc_video_docname)
    bibdoc_video._build_file_list()

    ## Fix the MARC
    _task_write_message("Fixing MARC")
    cli_fix_marc({}, [batch_job['recid']], False)

    if getval(batch_job, 'collection'):
        ## Make the record visible by moving in from the collection
        marcxml = ("<record><controlfield tag=\"001\">%d</controlfield>"
                   "<datafield tag=\"980\" ind1=\" \" ind2=\" \">"
                   "<subfield code=\"a\">%s</subfield></datafield></record>"
                   ) % (batch_job['recid'], batch_job['collection'])
        upload_marcxml_file(marcxml)

    #---------------------#
    # ADD MASTER METADATA #
    #---------------------#

    if getval(batch_job, 'add_master_metadata'):
        _task_write_message("Adding master metadata")
        pbcore = pbcore_metadata(input_file = getval(batch_job, 'input'),
                                 pbcoreIdentifier = batch_job['recid'],
                                 aspect_override = getval(batch_job, 'aspect'))
        from invenio_formatter.engines.xslt import format
        marcxml = format(pbcore, CFG_BIBENCODE_PBCORE_MARC_XSLT)
        upload_marcxml_file(marcxml)

    #------------------#
    # ADD MARC SNIPPET #
    #------------------#

    if getval(batch_job, 'marc_snippet'):
        marc_snippet = open(getval(batch_job, 'marc_snippet'))
        marcxml = marc_snippet.read()
        marc_snippet.close()
        upload_marcxml_file(marcxml)

    #--------------#
    # DELETE INPUT #
    #--------------#

    if getval(batch_job, 'delete_input'):
        _task_write_message("Deleting input file")
        # only if successfull
        if not return_code:
            # only if input matches pattern
            if getval(batch_job, 'delete_input_pattern', '') in getval(batch_job, 'input'):
                try:
                    os.remove(getval(batch_job, 'input'))
                except OSError:
                    pass

    #--------------#
    # NOTIFICATION #
    #--------------#

    ## Send Notification emails on errors
    if not return_code:
        if getval(batch_job, 'notify_user'):
            _notify_error_user(getval(batch_job, 'notify_user'),
                               getval(batch_job, 'submission_filename', batch_job['input']),
                               getval(batch_job, 'recid'),
                               getval(batch_job, 'submission_title', ""))
            _task_write_message("Notify user because of an error")
        if getval(batch_job, 'notify_admin'):
            _task_write_message("Notify admin because of an error")
            if type(getval(batch_job, 'notify_admin') == type(str()) ):
                _notify_error_admin(batch_job,
                                    getval(batch_job, 'notify_admin'))

            else:
                _notify_error_admin(batch_job)
    else:
        if getval(batch_job, 'notify_user'):
            _task_write_message("Notify user because of success")
            _notify_success_user(getval(batch_job, 'notify_user'),
                               getval(batch_job, 'submission_filename', batch_job['input']),
                               getval(batch_job, 'recid'),
                               getval(batch_job, 'submission_title', ""))
    return 1

Example #22

0

Show file

File: batch_engine.py Project: CharlotteIrisC/invenio

def process_batch_job(batch_job_file):
    """ Processes a batch job description dictionary

    @param batch_job_file: a fullpath to a batch job file
    @type batch_job_file: string
    @return: 1 if the process was successful, 0 if not
    @rtype; int
    """
    from invenio.legacy.bibdocfile.cli import cli_fix_marc

    def upload_marcxml_file(marcxml):
        """ Creates a temporary marcxml file and sends it to bibupload
        """
        xml_filename = 'bibencode_'+ str(batch_job['recid']) + '_' + str(uuid.uuid4()) + '.xml'
        xml_filename = os.path.join(invenio.config.CFG_TMPSHAREDDIR, xml_filename)
        xml_file = file(xml_filename, 'w')
        xml_file.write(marcxml)
        xml_file.close()
        targs = ['-c', xml_filename]
        task_low_level_submission('bibupload', 'bibencode', *targs)

    #---------#
    # GENERAL #
    #---------#

    _task_write_message("----------- Handling Master -----------")

    ## Check the validity of the batch file here
    batch_job = json_decode_file(batch_job_file)

    ## Sanitise batch description and raise errrors
    batch_job = sanitise_batch_job(batch_job)

    ## Check if the record exists
    # if record_exists(batch_job['recid']) < 1:
    #     raise Exception("Record not found")

    recdoc = BibRecDocs(batch_job['recid'])

    #--------------------#
    # UPDATE FROM MASTER #
    #--------------------#

    ## We want to add new stuff to the video's record, using the master as input
    if getval(batch_job, 'update_from_master'):
        found_master = False
        bibdocs = recdoc.list_bibdocs()
        for bibdoc in bibdocs:
            bibdocfiles = bibdoc.list_all_files()
            for bibdocfile in bibdocfiles:
                comment = bibdocfile.get_comment()
                description = bibdocfile.get_description()
                subformat = bibdocfile.get_subformat()
                m_comment = getval(batch_job, 'bibdoc_master_comment', comment)
                m_description = getval(batch_job, 'bibdoc_master_description', description)
                m_subformat = getval(batch_job, 'bibdoc_master_subformat', subformat)
                if (comment == m_comment and
                    description == m_description and
                    subformat == m_subformat):
                    found_master = True
                    batch_job['input'] = bibdocfile.get_full_path()
                    ## Get the aspect of the from the record
                    try:
                        ## Assumes pbcore metadata mapping
                        batch_job['aspect'] = get_fieldvalues(124, CFG_BIBENCODE_ASPECT_RATIO_MARC_FIELD)[0]
                    except IndexError:
                        pass
                    break
            if found_master:
                break
        if not found_master:
            _task_write_message("Video master for record %d not found"
                          % batch_job['recid'])
            task_update_progress("Video master for record %d not found"
                                 % batch_job['recid'])
            ## Maybe send an email?
            return 1

    ## Clean the job to do no upscaling etc
    if getval(batch_job, 'assure_quality'):
        batch_job = clean_job_for_quality(batch_job)

    global _BATCH_STEPS
    _BATCH_STEPS = len(batch_job['jobs'])

    ## Generate the docname from the input filename's name or given name
    bibdoc_video_docname, bibdoc_video_extension = decompose_file(batch_job['input'])[1:]
    if not bibdoc_video_extension or getval(batch_job, 'bibdoc_master_extension'):
        bibdoc_video_extension = getval(batch_job, 'bibdoc_master_extension')
    if getval(batch_job, 'bibdoc_master_docname'):
        bibdoc_video_docname = getval(batch_job, 'bibdoc_master_docname')

    write_message("Creating BibDoc for %s" % bibdoc_video_docname)
    ## If the bibdoc exists, receive it
    if bibdoc_video_docname in recdoc.get_bibdoc_names():
        bibdoc_video = recdoc.get_bibdoc(bibdoc_video_docname)
    ## Create a new bibdoc if it does not exist
    else:
        bibdoc_video = recdoc.add_bibdoc(docname=bibdoc_video_docname)

    ## Get the directory auf the newly created bibdoc to copy stuff there
    bibdoc_video_directory = bibdoc_video.get_base_dir()

    #--------#
    # MASTER #
    #--------#
    if not getval(batch_job, 'update_from_master'):
        if getval(batch_job, 'add_master'):
            ## Generate the right name for the master
            ## The master should be hidden first an then renamed
            ## when it is really available
            ## !!! FIX !!!
            _task_write_message("Adding %s master to the BibDoc"
                          % bibdoc_video_docname)
            master_format = compose_format(
                                    bibdoc_video_extension,
                                    getval(batch_job, 'bibdoc_master_subformat', 'master')
                                    )
            ## If a file of the same format is there, something is wrong, remove it!
            ## it might be caused by a previous corrupted submission etc.
            if bibdoc_video.format_already_exists_p(master_format):
                bibdoc_video.delete_file(master_format, 1)
            bibdoc_video.add_file_new_format(
                    batch_job['input'],
                    version=1,
                    description=getval(batch_job, 'bibdoc_master_description'),
                    comment=getval(batch_job, 'bibdoc_master_comment'),
                    docformat=master_format
                    )

    #-----------#
    # JOBS LOOP #
    #-----------#

    return_code = 1
    global _BATCH_STEP

    for job in batch_job['jobs']:

        _task_write_message("----------- Job %s of %s -----------"
                           % (_BATCH_STEP, _BATCH_STEPS))

        ## Try to substitute docname with master docname
        if getval(job, 'bibdoc_docname'):
            job['bibdoc_docname'] = Template(job['bibdoc_docname']).safe_substitute({'bibdoc_master_docname': bibdoc_video_docname})

        #-------------#
        # TRANSCODING #
        #-------------#

        if job['mode'] == 'encode':

            ## Skip the job if assure_quality is not set and marked as fallback
            if not getval(batch_job, 'assure_quality') and getval(job, 'fallback'):
                continue

            if getval(job, 'profile'):
                profile = get_encoding_profile(job['profile'])
            else:
                profile = None
            ## We need an extension defined fot the video container
            bibdoc_video_extension = getval(job, 'extension',
                                            getval(profile, 'extension'))
            if not bibdoc_video_extension:
                raise Exception("No container/extension defined")
            ## Get the docname and subformat
            bibdoc_video_subformat = getval(job, 'bibdoc_subformat')
            bibdoc_slave_video_docname = getval(job, 'bibdoc_docname', bibdoc_video_docname)
            ## The subformat is incompatible with ffmpegs name convention
            ## We do the encoding without and rename it afterwards
            bibdoc_video_fullpath = compose_file(
                                                 bibdoc_video_directory,
                                                 bibdoc_video_extension
                                                 )
            _task_write_message("Transcoding %s to %s;%s" % (bibdoc_slave_video_docname,
                                bibdoc_video_extension,
                                bibdoc_video_subformat))
            ## We encode now directly into the bibdocs directory
            encoding_result = encode_video(
                 input_file=batch_job['input'],
                 output_file=bibdoc_video_fullpath,
                 acodec=getval(job, 'audiocodec'),
                 vcodec=getval(job, 'videocodec'),
                 abitrate=getval(job, 'videobitrate'),
                 vbitrate=getval(job, 'audiobitrate'),
                 resolution=getval(job, 'resolution'),
                 passes=getval(job, 'passes', 1),
                 special=getval(job, 'special'),
                 specialfirst=getval(job, 'specialfirst'),
                 specialsecond=getval(job, 'specialsecond'),
                 metadata=getval(job, 'metadata'),
                 width=getval(job, 'width'),
                 height=getval(job, 'height'),
                 aspect=getval(batch_job, 'aspect'), # Aspect for every job
                 profile=getval(job, 'profile'),
                 update_fnc=_task_update_overall_status,
                 message_fnc=_task_write_message
                 )
            return_code &= encoding_result
            ## only on success
            if  encoding_result:
                ## Rename it, adding the subformat
                os.rename(bibdoc_video_fullpath,
                          compose_file(bibdoc_video_directory,
                                       bibdoc_video_extension,
                                       bibdoc_video_subformat,
                                       1,
                                       bibdoc_slave_video_docname)
                          )
                #bibdoc_video._build_file_list()
                bibdoc_video.touch()
                bibdoc_video._sync_to_db()
                bibdoc_video_format = compose_format(bibdoc_video_extension,
                                                     bibdoc_video_subformat)
                if getval(job, 'bibdoc_comment'):
                    bibdoc_video.set_comment(getval(job, 'bibdoc_comment'),
                                              bibdoc_video_format)
                if getval(job, 'bibdoc_description'):
                    bibdoc_video.set_description(getval(job, 'bibdoc_description'),
                                                 bibdoc_video_format)

        #------------#
        # EXTRACTION #
        #------------#

        # if there are multiple extraction jobs, all the produced files
        # with the same name will be in the same bibdoc! Make sure that
        # you use different subformats or docname templates to avoid
        # conflicts.

        if job['mode'] == 'extract':
            if getval(job, 'profile'):
                profile = get_extract_profile(job['profile'])
            else:
                profile = {}
            bibdoc_frame_subformat = getval(job, 'bibdoc_subformat')
            _task_write_message("Extracting frames to temporary directory")
            tmpdir = invenio.config.CFG_TMPDIR + "/" + str(uuid.uuid4())
            os.mkdir(tmpdir)
            #Move this to the batch description
            bibdoc_frame_docname = getval(job, 'bibdoc_docname', bibdoc_video_docname)
            tmpfname = (tmpdir + "/" + bibdoc_frame_docname + '.'
                        + getval(profile, 'extension',
                        getval(job, 'extension', 'jpg')))
            extraction_result = extract_frames(input_file=batch_job['input'],
                           output_file=tmpfname,
                           size=getval(job, 'size'),
                           positions=getval(job, 'positions'),
                           numberof=getval(job, 'numberof'),
                           width=getval(job, 'width'),
                           height=getval(job, 'height'),
                           aspect=getval(batch_job, 'aspect'),
                           profile=getval(job, 'profile'),
                           update_fnc=_task_update_overall_status,
                           )
            return_code &= extraction_result

            ## only on success:
            if extraction_result:
                ## for every filename in the directorys, create a bibdoc that contains
                ## all sizes of the frame from the two directories
                files = os.listdir(tmpdir)
                for filename in files:
                    ## The docname was altered by BibEncode extract through substitution
                    ## Retrieve it from the filename again
                    bibdoc_frame_docname, bibdoc_frame_extension = os.path.splitext(filename)
                    _task_write_message("Creating new bibdoc for %s" % bibdoc_frame_docname)
                    ## If the bibdoc exists, receive it
                    if bibdoc_frame_docname in recdoc.get_bibdoc_names():
                        bibdoc_frame = recdoc.get_bibdoc(bibdoc_frame_docname)
                    ## Create a new bibdoc if it does not exist
                    else:
                        bibdoc_frame = recdoc.add_bibdoc(docname=bibdoc_frame_docname)

                    ## The filename including path from tmpdir
                    fname = os.path.join(tmpdir, filename)

                    bibdoc_frame_format = compose_format(bibdoc_frame_extension, bibdoc_frame_subformat)
                    ## Same as with the master, if the format allready exists,
                    ## override it, because something went wrong before
                    if bibdoc_frame.format_already_exists_p(bibdoc_frame_format):
                        bibdoc_frame.delete_file(bibdoc_frame_format, 1)
                    _task_write_message("Adding %s jpg;%s to BibDoc"
                                  % (bibdoc_frame_docname,
                                     getval(job, 'bibdoc_subformat')))
                    bibdoc_frame.add_file_new_format(
                                    fname,
                                    version=1,
                                    description=getval(job, 'bibdoc_description'),
                                    comment=getval(job, 'bibdoc_comment'),
                                    docformat=bibdoc_frame_format)
            ## Remove the temporary folders
            _task_write_message("Removing temporary directory")
            shutil.rmtree(tmpdir)

        _BATCH_STEP = _BATCH_STEP + 1

    #-----------------#
    # FIX BIBDOC/MARC #
    #-----------------#

    _task_write_message("----------- Handling MARCXML -----------")

    ## Fix the BibDoc for all the videos previously created
    _task_write_message("Updating BibDoc of %s" % bibdoc_video_docname)
    bibdoc_video._build_file_list()

    ## Fix the MARC
    _task_write_message("Fixing MARC")
    cli_fix_marc({}, [batch_job['recid']], False)

    if getval(batch_job, 'collection'):
        ## Make the record visible by moving in from the collection
        marcxml = ("<record><controlfield tag=\"001\">%d</controlfield>"
                   "<datafield tag=\"980\" ind1=\" \" ind2=\" \">"
                   "<subfield code=\"a\">%s</subfield></datafield></record>"
                   ) % (batch_job['recid'], batch_job['collection'])
        upload_marcxml_file(marcxml)

    #---------------------#
    # ADD MASTER METADATA #
    #---------------------#

    if getval(batch_job, 'add_master_metadata'):
        _task_write_message("Adding master metadata")
        pbcore = pbcore_metadata(input_file = getval(batch_job, 'input'),
                                 pbcoreIdentifier = batch_job['recid'],
                                 aspect_override = getval(batch_job, 'aspect'))
        from invenio.modules.formatter.engines.xslt import format
        marcxml = format(pbcore, CFG_BIBENCODE_PBCORE_MARC_XSLT)
        upload_marcxml_file(marcxml)

    #------------------#
    # ADD MARC SNIPPET #
    #------------------#

    if getval(batch_job, 'marc_snippet'):
        marc_snippet = open(getval(batch_job, 'marc_snippet'))
        marcxml = marc_snippet.read()
        marc_snippet.close()
        upload_marcxml_file(marcxml)

    #--------------#
    # DELETE INPUT #
    #--------------#

    if getval(batch_job, 'delete_input'):
        _task_write_message("Deleting input file")
        # only if successfull
        if not return_code:
            # only if input matches pattern
            if getval(batch_job, 'delete_input_pattern', '') in getval(batch_job, 'input'):
                try:
                    os.remove(getval(batch_job, 'input'))
                except OSError:
                    pass

    #--------------#
    # NOTIFICATION #
    #--------------#

    ## Send Notification emails on errors
    if not return_code:
        if getval(batch_job, 'notify_user'):
            _notify_error_user(getval(batch_job, 'notify_user'),
                               getval(batch_job, 'submission_filename', batch_job['input']),
                               getval(batch_job, 'recid'),
                               getval(batch_job, 'submission_title', ""))
            _task_write_message("Notify user because of an error")
        if getval(batch_job, 'notify_admin'):
            _task_write_message("Notify admin because of an error")
            if type(getval(batch_job, 'notify_admin') == type(str()) ):
                _notify_error_admin(batch_job,
                                    getval(batch_job, 'notify_admin'))

            else:
                _notify_error_admin(batch_job)
    else:
        if getval(batch_job, 'notify_user'):
            _task_write_message("Notify user because of success")
            _notify_success_user(getval(batch_job, 'notify_user'),
                               getval(batch_job, 'submission_filename', batch_job['input']),
                               getval(batch_job, 'recid'),
                               getval(batch_job, 'submission_title', ""))
    return 1

Example #23

0

Show file

File: file_metadata.py Project: chokribr/invenio-1

def read_metadata(inputfile,
                  force=None,
                  remote=False,
                  loginpw=None,
                  verbose=0):
    """Return metadata extracted from given file as dictionary.

    Availability depends on input file format and installed plugins
    (return C{TypeError} if unsupported file format).

    @param inputfile: path to a file
    @type inputfile: string
    @param verbose: verbosity
    @type verbose: int
    @param force: name of plugin to use, to skip plugin auto-discovery
    @type force: string
    @param remote: if the file is accessed remotely or not
    @type remote: boolean
    @param loginpw: credentials to access secure servers (username:password)
    @type loginpw: string
    @return: dictionary of metadata tags as keys, and (interpreted)
             value as value
    @rtype: dict
    @raise TypeError: if file format is not supported.
    @raise RuntimeError: if required library to process file is missing.
    @raise InvenioWebSubmitFileMetadataRuntimeError:
    when metadata cannot be read.
    """
    metadata = None
    # Check file type (0 base, 1 name, 2 ext)
    ext = decompose_file(inputfile)[2]
    if verbose > 5:
        print(ext.lower(), 'extension to extract from')

    # Loop through the plugins to find a good one for given file
    for plugin_name, plugin in iteritems(metadata_extractor_plugins):
        # Local file
        if 'can_read_local' in plugin and \
            plugin['can_read_local'](inputfile) and not remote and \
                (not force or plugin_name == force):
            if verbose > 5:
                print('Using ' + plugin_name)
            fetched_metadata = plugin['read_metadata_local'](inputfile,
                                                             verbose)
            if not metadata:
                metadata = fetched_metadata
            else:
                metadata.update(fetched_metadata)

        # Remote file
        elif remote and 'can_read_remote' in plugin and \
            plugin['can_read_remote'](inputfile) and \
                (not force or plugin_name == force):
            if verbose > 5:
                print('Using ' + plugin_name)
            fetched_metadata = plugin['read_metadata_remote'](inputfile,
                                                              loginpw, verbose)
            if not metadata:
                metadata = fetched_metadata
            else:
                metadata.update(fetched_metadata)

    # Return in case we have something
    if metadata is not None:
        return metadata

    # Case of no plugin found, raise
    raise TypeError('Unsupported file type')

Example #24

0

Show file

File: webinterface.py Project: chokribr/invenio-1

    def upload_video(self, req, form):
        """
        A clone of uploadfile but for (large) videos.
        Does not copy the uploaded file to the websubmit directory.
        Instead, the path to the file is stored inside the submission directory.
        """
        def gcd(a, b):
            """ the euclidean algorithm """
            while a:
                a, b = b % a, a
            return b

        from invenio.modules.encoder.extract import extract_frames
        from invenio.modules.encoder.config import CFG_BIBENCODE_WEBSUBMIT_ASPECT_SAMPLE_DIR, CFG_BIBENCODE_WEBSUBMIT_ASPECT_SAMPLE_FNAME
        from invenio.modules.encoder.encode import determine_aspect
        from invenio.modules.encoder.utils import probe
        from invenio.modules.encoder.metadata import ffprobe_metadata
        from invenio.legacy.websubmit.config import CFG_WEBSUBMIT_TMP_VIDEO_PREFIX

        argd = wash_urlargd(
            form, {
                'doctype': (str, ''),
                'access': (str, ''),
                'indir': (str, ''),
                'session_id': (str, ''),
                'rename': (str, ''),
            })

        curdir = None
        if "indir" not in form or \
               "doctype" not in form or \
               "access" not in form:
            raise apache.SERVER_RETURN(apache.HTTP_BAD_REQUEST)
        else:
            curdir = os.path.join(CFG_WEBSUBMIT_STORAGEDIR, argd['indir'],
                                  argd['doctype'], argd['access'])

        user_info = collect_user_info(req)
        if "session_id" in form:
            # Are we uploading using Flash, which does not transmit
            # cookie? The expect to receive session_id as a form
            # parameter.  First check that IP addresses do not
            # mismatch.

            uid = session.uid
            user_info = collect_user_info(uid)
            try:
                act_fd = file(os.path.join(curdir, 'act'))
                action = act_fd.read()
                act_fd.close()
            except:
                act = ""

        # Is user authorized to perform this action?
        (auth_code, auth_message) = acc_authorize_action(
            uid,
            "submit",
            authorized_if_no_roles=not isGuestUser(uid),
            verbose=0,
            doctype=argd['doctype'],
            act=action)
        if acc_is_role("submit", doctype=argd['doctype'],
                       act=action) and auth_code != 0:
            # User cannot submit
            raise apache.SERVER_RETURN(apache.HTTP_UNAUTHORIZED)
        else:
            # Process the upload and get the response
            json_response = {}
            for key, formfields in form.items():
                filename = key.replace("[]", "")
                if hasattr(formfields, "filename") and formfields.filename:
                    dir_to_open = os.path.abspath(
                        os.path.join(curdir, 'files', str(user_info['uid']),
                                     key))
                    try:
                        assert (
                            dir_to_open.startswith(CFG_WEBSUBMIT_STORAGEDIR))
                    except AssertionError:
                        register_exception(req=req,
                                           prefix='curdir="%s", key="%s"' %
                                           (curdir, key))
                        raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN)

                    if not os.path.exists(dir_to_open):
                        try:
                            os.makedirs(dir_to_open)
                        except OSError as e:
                            if e.errno != errno.EEXIST:
                                # If the issue is only that directory
                                # already exists, then continue, else
                                # report
                                register_exception(req=req, alert_admin=True)
                                raise apache.SERVER_RETURN(
                                    apache.HTTP_FORBIDDEN)

                    filename = formfields.filename
                    ## Before saving the file to disc, wash the filename (in particular
                    ## washing away UNIX and Windows (e.g. DFS) paths):
                    filename = os.path.basename(filename.split('\\')[-1])
                    filename = filename.strip()
                    if filename != "":
                        # Check that file does not already exist
                        while os.path.exists(
                                os.path.join(dir_to_open, filename)):
                            #dirname, basename, extension = decompose_file(new_destination_path)
                            basedir, name, extension = decompose_file(filename)
                            new_name = propose_next_docname(name)
                            filename = new_name + extension

                        #-------------#
                        # VIDEO STUFF #
                        #-------------#

                        ## Remove all previous uploads
                        filelist = os.listdir(
                            os.path.split(formfields.file.name)[0])
                        for afile in filelist:
                            if argd['access'] in afile:
                                os.remove(
                                    os.path.join(
                                        os.path.split(formfields.file.name)[0],
                                        afile))

                        ## Check if the file is a readable video
                        ## We must exclude all image and audio formats that are readable by ffprobe
                        if (os.path.splitext(filename)[1] in [
                                'jpg', 'jpeg', 'gif', 'tiff', 'bmp', 'png',
                                'tga', 'jp2', 'j2k', 'jpf', 'jpm', 'mj2',
                                'biff', 'cgm', 'exif', 'img', 'mng', 'pic',
                                'pict', 'raw', 'wmf', 'jpe', 'jif', 'jfif',
                                'jfi', 'tif', 'webp', 'svg', 'ai', 'ps', 'psd',
                                'wav', 'mp3', 'pcm', 'aiff', 'au', 'flac',
                                'wma', 'm4a', 'wv', 'oga', 'm4a', 'm4b', 'm4p',
                                'm4r', 'aac', 'mp4', 'vox', 'amr', 'snd'
                        ] or not probe(formfields.file.name)):
                            formfields.file.close()
                            raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN)

                        ## We have no "delete" attribute in Python 2.4
                        if sys.hexversion < 0x2050000:
                            ## We need to rename first and create a dummy file
                            ## Rename the temporary file for the garbage collector
                            new_tmp_fullpath = os.path.split(
                                formfields.file.name
                            )[0] + "/" + CFG_WEBSUBMIT_TMP_VIDEO_PREFIX + argd[
                                'access'] + "_" + os.path.split(
                                    formfields.file.name)[1]
                            os.rename(formfields.file.name, new_tmp_fullpath)
                            dummy = open(formfields.file.name, "w")
                            dummy.close()
                            formfields.file.close()
                        else:
                            # Mark the NamedTemporatyFile as not to be deleted
                            formfields.file.delete = False
                            formfields.file.close()
                            ## Rename the temporary file for the garbage collector
                            new_tmp_fullpath = os.path.split(
                                formfields.file.name
                            )[0] + "/" + CFG_WEBSUBMIT_TMP_VIDEO_PREFIX + argd[
                                'access'] + "_" + os.path.split(
                                    formfields.file.name)[1]
                            os.rename(formfields.file.name, new_tmp_fullpath)

                        # Write the path to the temp file to a file in STORAGEDIR
                        fp = open(os.path.join(dir_to_open, "filepath"), "w")
                        fp.write(new_tmp_fullpath)
                        fp.close()

                        fp = open(os.path.join(dir_to_open, "filename"), "w")
                        fp.write(filename)
                        fp.close()

                        ## We are going to extract some thumbnails for websubmit ##
                        sample_dir = os.path.join(
                            curdir, 'files', str(user_info['uid']),
                            CFG_BIBENCODE_WEBSUBMIT_ASPECT_SAMPLE_DIR)
                        try:
                            ## Remove old thumbnails
                            shutil.rmtree(sample_dir)
                        except OSError:
                            register_exception(req=req, alert_admin=False)
                        try:
                            os.makedirs(
                                os.path.join(curdir, 'files',
                                             str(user_info['uid']),
                                             sample_dir))
                        except OSError:
                            register_exception(req=req, alert_admin=False)
                        try:
                            extract_frames(
                                input_file=new_tmp_fullpath,
                                output_file=os.path.join(
                                    sample_dir,
                                    CFG_BIBENCODE_WEBSUBMIT_ASPECT_SAMPLE_FNAME
                                ),
                                size="600x600",
                                numberof=5)
                            json_response['frames'] = []
                            for extracted_frame in os.listdir(sample_dir):
                                json_response['frames'].append(extracted_frame)
                        except:
                            ## If the frame extraction fails, something was bad with the video
                            os.remove(new_tmp_fullpath)
                            register_exception(req=req, alert_admin=False)
                            raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN)

                        ## Try to detect the aspect. if this fails, the video is not readable
                        ## or a wrong file might have been uploaded
                        try:
                            (aspect, width,
                             height) = determine_aspect(new_tmp_fullpath)
                            if aspect:
                                aspx, aspy = aspect.split(':')
                            else:
                                the_gcd = gcd(width, height)
                                aspx = str(width / the_gcd)
                                aspy = str(height / the_gcd)
                            json_response['aspx'] = aspx
                            json_response['aspy'] = aspy
                        except TypeError:
                            ## If the aspect detection completely fails
                            os.remove(new_tmp_fullpath)
                            register_exception(req=req, alert_admin=False)
                            raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN)

                        ## Try to extract some metadata from the video container
                        metadata = ffprobe_metadata(new_tmp_fullpath)
                        json_response['meta_title'] = metadata['format'].get(
                            'TAG:title')
                        json_response['meta_description'] = metadata[
                            'format'].get('TAG:description')
                        json_response['meta_year'] = metadata['format'].get(
                            'TAG:year')
                        json_response['meta_author'] = metadata['format'].get(
                            'TAG:author')
                    ## Empty file name
                    else:
                        raise apache.SERVER_RETURN(apache.HTTP_BAD_REQUEST)
                    ## We found our file, we can break the loop
                    break

            # Send our response
            if CFG_JSON_AVAILABLE:

                dumped_response = json.dumps(json_response)

                # store the response in the websubmit directory
                # this is needed if the submission is not finished and continued later
                response_dir = os.path.join(curdir, 'files',
                                            str(user_info['uid']), "response")
                try:
                    os.makedirs(response_dir)
                except OSError:
                    # register_exception(req=req, alert_admin=False)
                    pass
                fp = open(os.path.join(response_dir, "response"), "w")
                fp.write(dumped_response)
                fp.close()

                return dumped_response

Example #25

0

Show file

File: webinterface.py Project: mhellmic/b2share

    def upload_video(self, req, form):
        """
        A clone of uploadfile but for (large) videos.
        Does not copy the uploaded file to the websubmit directory.
        Instead, the path to the file is stored inside the submission directory.
        """

        def gcd(a, b):
            """ the euclidean algorithm """
            while a:
                a, b = b % a, a
            return b

        from invenio.modules.encoder.extract import extract_frames
        from invenio.modules.encoder.config import CFG_BIBENCODE_WEBSUBMIT_ASPECT_SAMPLE_DIR, CFG_BIBENCODE_WEBSUBMIT_ASPECT_SAMPLE_FNAME
        from invenio.modules.encoder.encode import determine_aspect
        from invenio.modules.encoder.utils import probe
        from invenio.modules.encoder.metadata import ffprobe_metadata
        from invenio.legacy.websubmit.config import CFG_WEBSUBMIT_TMP_VIDEO_PREFIX

        argd = wash_urlargd(form, {
            'doctype': (str, ''),
            'access': (str, ''),
            'indir': (str, ''),
            'session_id': (str, ''),
            'rename': (str, ''),
            })

        curdir = None
        if "indir" not in form or \
               "doctype" not in form or \
               "access" not in form:
            raise apache.SERVER_RETURN(apache.HTTP_BAD_REQUEST)
        else:
            curdir = os.path.join(CFG_WEBSUBMIT_STORAGEDIR,
                                  argd['indir'],
                                  argd['doctype'],
                                  argd['access'])

        user_info = collect_user_info(req)
        if "session_id" in form:
            # Are we uploading using Flash, which does not transmit
            # cookie? The expect to receive session_id as a form
            # parameter.  First check that IP addresses do not
            # mismatch.

            uid = session.uid
            user_info = collect_user_info(uid)
            try:
                act_fd = file(os.path.join(curdir, 'act'))
                action = act_fd.read()
                act_fd.close()
            except:
                act = ""

        # Is user authorized to perform this action?
        (auth_code, auth_message) = acc_authorize_action(uid, "submit",
                                                         authorized_if_no_roles=not isGuestUser(uid),
                                                         verbose=0,
                                                         doctype=argd['doctype'],
                                                         act=action)
        if acc_is_role("submit", doctype=argd['doctype'], act=action) and auth_code != 0:
            # User cannot submit
            raise apache.SERVER_RETURN(apache.HTTP_UNAUTHORIZED)
        else:
            # Process the upload and get the response
            json_response = {}
            for key, formfields in form.items():
                filename = key.replace("[]", "")
                if hasattr(formfields, "filename") and formfields.filename:
                    dir_to_open = os.path.abspath(os.path.join(curdir,
                                                               'files',
                                                               str(user_info['uid']),
                                                               key))
                    try:
                        assert(dir_to_open.startswith(CFG_WEBSUBMIT_STORAGEDIR))
                    except AssertionError:
                        register_exception(req=req, prefix='curdir="%s", key="%s"' % (curdir, key))
                        raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN)

                    if not os.path.exists(dir_to_open):
                        try:
                            os.makedirs(dir_to_open)
                        except OSError as e:
                            if e.errno != errno.EEXIST:
                                # If the issue is only that directory
                                # already exists, then continue, else
                                # report
                                register_exception(req=req, alert_admin=True)
                                raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN)

                    filename = formfields.filename
                    ## Before saving the file to disc, wash the filename (in particular
                    ## washing away UNIX and Windows (e.g. DFS) paths):
                    filename = os.path.basename(filename.split('\\')[-1])
                    filename = filename.strip()
                    if filename != "":
                        # Check that file does not already exist
                        while os.path.exists(os.path.join(dir_to_open, filename)):
                            #dirname, basename, extension = decompose_file(new_destination_path)
                            basedir, name, extension = decompose_file(filename)
                            new_name = propose_next_docname(name)
                            filename = new_name + extension

                        #-------------#
                        # VIDEO STUFF #
                        #-------------#

                        ## Remove all previous uploads
                        filelist = os.listdir(os.path.split(formfields.file.name)[0])
                        for afile in filelist:
                            if argd['access'] in afile:
                                os.remove(os.path.join(os.path.split(formfields.file.name)[0], afile))

                        ## Check if the file is a readable video
                        ## We must exclude all image and audio formats that are readable by ffprobe
                        if (os.path.splitext(filename)[1] in ['jpg', 'jpeg', 'gif', 'tiff', 'bmp', 'png', 'tga',
                                                              'jp2', 'j2k', 'jpf', 'jpm', 'mj2', 'biff', 'cgm',
                                                              'exif', 'img', 'mng', 'pic', 'pict', 'raw', 'wmf', 'jpe', 'jif',
                                                              'jfif', 'jfi', 'tif', 'webp', 'svg', 'ai', 'ps', 'psd',
                                                              'wav', 'mp3', 'pcm', 'aiff', 'au', 'flac', 'wma', 'm4a', 'wv', 'oga',
                                                              'm4a', 'm4b', 'm4p', 'm4r', 'aac', 'mp4', 'vox', 'amr', 'snd']
                                                              or not probe(formfields.file.name)):
                            formfields.file.close()
                            raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN)

                        ## We have no "delete" attribute in Python 2.4
                        if sys.hexversion < 0x2050000:
                            ## We need to rename first and create a dummy file
                            ## Rename the temporary file for the garbage collector
                            new_tmp_fullpath = os.path.split(formfields.file.name)[0] + "/" + CFG_WEBSUBMIT_TMP_VIDEO_PREFIX + argd['access'] + "_" + os.path.split(formfields.file.name)[1]
                            os.rename(formfields.file.name, new_tmp_fullpath)
                            dummy = open(formfields.file.name, "w")
                            dummy.close()
                            formfields.file.close()
                        else:
                            # Mark the NamedTemporatyFile as not to be deleted
                            formfields.file.delete = False
                            formfields.file.close()
                            ## Rename the temporary file for the garbage collector
                            new_tmp_fullpath = os.path.split(formfields.file.name)[0] + "/" + CFG_WEBSUBMIT_TMP_VIDEO_PREFIX + argd['access'] + "_" + os.path.split(formfields.file.name)[1]
                            os.rename(formfields.file.name, new_tmp_fullpath)

                        # Write the path to the temp file to a file in STORAGEDIR
                        fp = open(os.path.join(dir_to_open, "filepath"), "w")
                        fp.write(new_tmp_fullpath)
                        fp.close()

                        fp = open(os.path.join(dir_to_open, "filename"), "w")
                        fp.write(filename)
                        fp.close()

                        ## We are going to extract some thumbnails for websubmit ##
                        sample_dir = os.path.join(curdir, 'files', str(user_info['uid']), CFG_BIBENCODE_WEBSUBMIT_ASPECT_SAMPLE_DIR)
                        try:
                            ## Remove old thumbnails
                            shutil.rmtree(sample_dir)
                        except OSError:
                            register_exception(req=req, alert_admin=False)
                        try:
                            os.makedirs(os.path.join(curdir, 'files', str(user_info['uid']), sample_dir))
                        except OSError:
                            register_exception(req=req, alert_admin=False)
                        try:
                            extract_frames(input_file=new_tmp_fullpath,
                                        output_file=os.path.join(sample_dir, CFG_BIBENCODE_WEBSUBMIT_ASPECT_SAMPLE_FNAME),
                                        size="600x600",
                                        numberof=5)
                            json_response['frames'] = []
                            for extracted_frame in os.listdir(sample_dir):
                                json_response['frames'].append(extracted_frame)
                        except:
                            ## If the frame extraction fails, something was bad with the video
                            os.remove(new_tmp_fullpath)
                            register_exception(req=req, alert_admin=False)
                            raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN)

                        ## Try to detect the aspect. if this fails, the video is not readable
                        ## or a wrong file might have been uploaded
                        try:
                            (aspect, width, height) = determine_aspect(new_tmp_fullpath)
                            if aspect:
                                aspx, aspy = aspect.split(':')
                            else:
                                the_gcd = gcd(width, height)
                                aspx = str(width / the_gcd)
                                aspy = str(height / the_gcd)
                            json_response['aspx'] = aspx
                            json_response['aspy'] = aspy
                        except TypeError:
                            ## If the aspect detection completely fails
                            os.remove(new_tmp_fullpath)
                            register_exception(req=req, alert_admin=False)
                            raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN)

                        ## Try to extract some metadata from the video container
                        metadata = ffprobe_metadata(new_tmp_fullpath)
                        json_response['meta_title'] = metadata['format'].get('TAG:title')
                        json_response['meta_description'] = metadata['format'].get('TAG:description')
                        json_response['meta_year'] = metadata['format'].get('TAG:year')
                        json_response['meta_author'] = metadata['format'].get('TAG:author')
                    ## Empty file name
                    else:
                        raise apache.SERVER_RETURN(apache.HTTP_BAD_REQUEST)
                    ## We found our file, we can break the loop
                    break;

            # Send our response
            if CFG_JSON_AVAILABLE:

                dumped_response = json.dumps(json_response)

                # store the response in the websubmit directory
                # this is needed if the submission is not finished and continued later
                response_dir = os.path.join(curdir, 'files', str(user_info['uid']), "response")
                try:
                    os.makedirs(response_dir)
                except OSError:
                    # register_exception(req=req, alert_admin=False)
                    pass
                fp = open(os.path.join(response_dir, "response"), "w")
                fp.write(dumped_response)
                fp.close()

                return dumped_response

Example #26

0

Show file

File: Move_Files_to_Storage.py Project: SCOAP3/invenio

def Move_Files_to_Storage(parameters, curdir, form, user_info=None):
    """
    The function moves files received from the standard submission's
    form through file input element(s). The document are assigned a
    'doctype' (or category) corresponding to the file input element
    (eg. a file uploaded throught 'DEMOPIC_FILE' will go to
    'DEMOPIC_FILE' doctype/category).

    Websubmit engine builds the following file organization in the
    directory curdir/files:

                  curdir/files
                        |
      _____________________________________________________________________
            |                                   |                          |
      ./file input 1 element's name      ./file input 2 element's name    ....
         (for eg. 'DEMOART_MAILFILE')       (for eg. 'DEMOART_APPENDIX')
         |                                     |
      test1.pdf                             test2.pdf


    There is only one instance of all possible extension(pdf, gz...) in each part
    otherwise we may encounter problems when renaming files.

    + parameters['rename']: if given, all the files in curdir/files
      are renamed.  parameters['rename'] is of the form:
      <PA>elemfilename[re]</PA>* where re is an regexp to select(using
      re.sub) what part of the elem file has to be selected.
      e.g: <PA>file:TEST_FILE_RN</PA>

    + parameters['documenttype']: if given, other formats are created.
      It has 2 possible values: - if "picture" icon in gif format is created
                                - if "fulltext" ps, gz .... formats are created

    + parameters['paths_and_suffixes']: directories to look into and
      corresponding suffix to add to every file inside. It must have
      the same structure as a Python dictionnary of the following form
      {'FrenchAbstract':'french', 'EnglishAbstract':''}

      The keys are the file input element name from the form <=>
      directories in curdir/files The values associated are the
      suffixes which will be added to all the files in
      e.g. curdir/files/FrenchAbstract

    + parameters['iconsize'] need only if 'icon' is selected in
      parameters['documenttype']

    + parameters['paths_and_restrictions']: the restrictions to apply
      to each uploaded file. The parameter must have the same
      structure as a Python dictionnary of the following form:
      {'DEMOART_APPENDIX':'restricted'}
      Files not specified in this parameter are not restricted.
      The specified restrictions can include a variable that can be
      replaced at runtime, for eg:
      {'DEMOART_APPENDIX':'restricted to <PA>file:SuE</PA>'}

    + parameters['paths_and_doctypes']: if a doctype is specified,
      the file will be saved under the 'doctype/collection' instead
      of under the default doctype/collection given by the name
      of the upload element that was used on the websubmit interface.
      to configure the doctype in websubmit, enter the value as in a
      dictionnary, for eg:
      {'PATHS_SWORD_UPL' : 'PUSHED_TO_ARXIV'} -> from
      Demo_Export_Via_Sword [DEMOSWR] Document Types
    """

    global sysno
    paths_and_suffixes = parameters['paths_and_suffixes']
    paths_and_restrictions = parameters['paths_and_restrictions']
    rename = parameters['rename']
    documenttype = parameters['documenttype']
    iconsizes = parameters['iconsize'].split(',')
    paths_and_doctypes = parameters['paths_and_doctypes']

    ## Create an instance of BibRecDocs for the current recid(sysno)
    bibrecdocs = BibRecDocs(sysno)

    paths_and_suffixes = get_dictionary_from_string(paths_and_suffixes)

    paths_and_restrictions = get_dictionary_from_string(paths_and_restrictions)

    paths_and_doctypes = get_dictionary_from_string(paths_and_doctypes)

    ## Go through all the directories specified in the keys
    ## of parameters['paths_and_suffixes']
    for path in paths_and_suffixes.keys():
        ## Check if there is a directory for the current path
        if os.path.exists("%s/files/%s" % (curdir, path)):
            ## Retrieve the restriction to apply to files in this
            ## directory
            restriction = paths_and_restrictions.get(path, '')
            restriction = re.sub('<PA>(?P<content>[^<]*)</PA>',
                                 get_pa_tag_content,
                                 restriction)

            ## Go through all the files in curdir/files/path
            for current_file in os.listdir("%s/files/%s" % (curdir, path)):
                ## retrieve filename and extension
                dummy, filename, extension = decompose_file(current_file)
                if extension and extension[0] != ".":
                    extension = '.' + extension
                if len(paths_and_suffixes[path]) != 0:
                    extension = "_%s%s" % (paths_and_suffixes[path], extension)
                ## Build the new file name if rename parameter has been given
                if rename:
                    filename = re.sub('<PA>(?P<content>[^<]*)</PA>', \
                                      get_pa_tag_content, \
                                      parameters['rename'])

                if rename or len(paths_and_suffixes[path]) != 0 :
                    ## Rename the file
                    try:
                        # Write the log rename_cmd
                        fd = open("%s/rename_cmd" % curdir, "a+")
                        fd.write("%s/files/%s/%s" % (curdir, path, current_file) + " to " +\
                                  "%s/files/%s/%s%s" % (curdir, path, filename, extension) + "\n\n")
                        ## Rename
                        os.rename("%s/files/%s/%s" % (curdir, path, current_file), \
                                  "%s/files/%s/%s%s" % (curdir, path, filename, extension))

                        fd.close()
                        ## Save the new name in a text file in curdir so that
                        ## the new filename can be used by templates to created the recmysl
                        fd = open("%s/%s_RENAMED" % (curdir, path), "w")
                        fd.write("%s%s" % (filename, extension))
                        fd.close()
                    except OSError as err:
                        msg = "Cannot rename the file.[%s]"
                        msg %= str(err)
                        raise InvenioWebSubmitFunctionWarning(msg)
                fullpath = "%s/files/%s/%s%s" % (curdir, path, filename, extension)
                ## Check if there is any existing similar file
                if not bibrecdocs.check_file_exists(fullpath, extension):
                    bibdoc = bibrecdocs.add_new_file(fullpath, doctype=paths_and_doctypes.get(path, path), never_fail=True)
                    bibdoc.set_status(restriction)
                    ## Fulltext
                    if documenttype == "fulltext":
                        additionalformats = createRelatedFormats(fullpath)
                        if len(additionalformats) > 0:
                            for additionalformat in additionalformats:
                                try:
                                    bibrecdocs.add_new_format(additionalformat)
                                except InvenioBibDocFileError:
                                    pass
                    ## Icon
                    elif documenttype == "picture":
                        has_added_default_icon_subformat_p = False
                        for iconsize in iconsizes:
                            try:
                                iconpath, iconname = create_icon({
                                    'input-file' : fullpath,
                                    'icon-scale' : iconsize,
                                    'icon-name' : None,
                                    'icon-file-format' : None,
                                    'multipage-icon' : False,
                                    'multipage-icon-delay' : 100,
                                    'verbosity' : 0,
                                })
                            except Exception as e:
                                register_exception(prefix='Impossible to create icon for %s (record %s)' % (fullpath, sysno), alert_admin=True)
                                continue
                            iconpath = os.path.join(iconpath, iconname)
                            docname = decompose_file(fullpath)[1]
                            try:
                                mybibdoc = bibrecdocs.get_bibdoc(docname)
                            except InvenioBibDocFileError:
                                mybibdoc = None
                            if iconpath is not None and mybibdoc is not None:
                                try:
                                    icon_suffix = iconsize.replace('>', '').replace('<', '').replace('^', '').replace('!', '')
                                    if not has_added_default_icon_subformat_p:
                                        mybibdoc.add_icon(iconpath)
                                        has_added_default_icon_subformat_p = True
                                    else:
                                        mybibdoc.add_icon(iconpath, subformat=CFG_BIBDOCFILE_DEFAULT_ICON_SUBFORMAT + "-" + icon_suffix)
                                    ## Save the new icon filename in a text file in curdir so that
                                    ## it can be used by templates to created the recmysl
                                    try:
                                        if not has_added_default_icon_subformat_p:
                                            fd = open("%s/%s_ICON" % (curdir, path), "w")
                                        else:
                                            fd = open("%s/%s_ICON_%s" % (curdir, path, iconsize + '_' + icon_suffix), "w")
                                        fd.write(os.path.basename(iconpath))
                                        fd.close()
                                    except OSError as err:
                                        msg = "Cannot store icon filename.[%s]"
                                        msg %= str(err)
                                        raise InvenioWebSubmitFunctionWarning(msg)
                                except InvenioBibDocFileError as e:
                                    # Most probably icon already existed.
                                    pass
                            elif mybibdoc is not None:
                                mybibdoc.delete_icon()

    # Update the MARC
    bibdocfile_bin = os.path.join(CFG_BINDIR, 'bibdocfile --yes-i-know')
    run_shell_command(bibdocfile_bin + " --fix-marc --recid=%s", (str(sysno),))

    # Delete the HB BibFormat cache in the DB, so that the fulltext
    # links do not point to possible dead files
    run_sql("DELETE LOW_PRIORITY from bibfmt WHERE format='HB' AND id_bibrec=%s", (sysno,))

    return ""

Example #27

0

Show file

def handle_file_post(req, allowed_mimetypes=None):
    """
    Handle the POST of a file.
    @return: the a tuple with the full path to the file saved on disk,
    and it's mimetype as provided by the request.
    @rtype: (string, string)
    """
    from invenio.legacy.bibdocfile.api import decompose_file, md5
    ## We retrieve the length
    clen = req.headers_in["Content-Length"]
    if clen is None:
        raise InvenioWebInterfaceWSGIContentLenghtError("Content-Length header is missing")
    try:
        clen = int(clen)
        assert (clen > 1)
    except (ValueError, AssertionError):
        raise InvenioWebInterfaceWSGIContentLenghtError("Content-Length header should contain a positive integer")
    ## Let's take the content type
    ctype = req.headers_in["Content-Type"]
    if allowed_mimetypes and ctype not in allowed_mimetypes:
        raise InvenioWebInterfaceWSGIContentTypeError("Content-Type not in allowed list of content types: %s" % allowed_mimetypes)
    ## Let's optionally accept a suggested filename
    suffix = prefix = ''
    g = RE_CDISPOSITION_FILENAME.search(req.headers_in.get("Content-Disposition", ""))
    if g:
        dummy, prefix, suffix = decompose_file(g.group("filename"))
    ## Let's optionally accept an MD5 hash (and use it later for comparison)
    cmd5 = req.headers_in.get("Content-MD5")
    if cmd5:
        the_md5 = md5()

    ## Ok. We can initialize the file
    fd, path = tempfile.mkstemp(suffix=suffix, prefix=prefix, dir=CFG_TMPDIR)
    the_file = os.fdopen(fd, 'w')
    ## Let's read the file
    while True:
        chunk = req.read(min(10240, clen))
        if len(chunk) < min(10240, clen):
            ## We expected to read at least clen (which is different than 0)
            ## but chunk was shorter! Gosh! Error! Panic!
            the_file.close()
            os.close(fd)
            os.remove(path)
            raise InvenioWebInterfaceWSGIContentLenghtError("File shorter than what specified in Content-Length")
        if cmd5:
            ## MD5 was in the header let's compute it
            the_md5.update(chunk)
        ## And let's definitively write the content to disk :-)
        the_file.write(chunk)
        clen -= len(chunk)
        if clen == 0:
            ## That's it. Everything was read.
            break
    if cmd5 and the_md5.hexdigest().lower() != cmd5.strip().lower():
        ## Let's check the MD5
        the_file.close()
        os.close(fd)
        os.remove(path)
        raise InvenioWebInterfaceWSGIContentMD5Error("MD5 checksum does not match")
    ## Let's clean everything up
    the_file.close()
    return (path, ctype)

Example #28

0

Show file

File: Shared_Functions.py Project: SCOAP3/invenio

def createRelatedFormats(fullpath, overwrite=True, debug=False, consider_version=False):
    """Given a fullpath, this function extracts the file's extension and
    finds in which additional format the file can be converted and converts it.
    @param fullpath: (string) complete path to file
    @param overwrite: (bool) overwrite already existing formats
    @param consider_version: (bool) if True, consider the version info
                             in C{fullpath} to find missing format
                             for that specific version, if C{fullpath}
                             contains version info
    Return a list of the paths to the converted files
    """
    file_converter_logger = get_file_converter_logger()
    old_logging_level = file_converter_logger.getEffectiveLevel()
    if debug:
        file_converter_logger.setLevel(DEBUG)
    try:
        createdpaths = []
        if consider_version:
            try:
                basedir, filename, extension, version = decompose_file_with_version(fullpath)
            except:
                basedir, filename, extension = decompose_file(fullpath)
                version = 0
        else:
            basedir, filename, extension = decompose_file(fullpath)
            version = 0
        extension = extension.lower()
        if debug:
            print("basedir: %s, filename: %s, extension: %s" % (basedir, filename, extension), file=sys.stderr)

        if overwrite:
            missing_formats = get_missing_formats([fullpath])
        else:
            if version:
                filelist = glob.glob(os.path.join(basedir, '%s*;%s' % (filename, version)))
            else:
                filelist = glob.glob(os.path.join(basedir, '%s*' % filename))
            if debug:
                print("filelist: %s" % filelist, file=sys.stderr)
            missing_formats = get_missing_formats(filelist)
        if debug:
            print("missing_formats: %s" % missing_formats, file=sys.stderr)
        for path, formats in iteritems(missing_formats):
            if debug:
                print("... path: %s, formats: %s" % (path, formats), file=sys.stderr)
            for aformat in formats:
                if debug:
                    print("...... aformat: %s" % aformat, file=sys.stderr)
                newpath = os.path.join(basedir, filename + aformat)
                if debug:
                    print("...... newpath: %s" % newpath, file=sys.stderr)
                try:
                    if CFG_BIBDOCFILE_FILEDIR in basedir:
                        # We should create the new files in a temporary location, not
                        # directly inside the BibDoc directory.
                        newpath = convert_file(path, output_format=aformat)
                    else:
                        convert_file(path, newpath)
                    createdpaths.append(newpath)
                except InvenioWebSubmitFileConverterError as msg:
                    if debug:
                        print("...... Exception: %s" % msg, file=sys.stderr)
                    register_exception(alert_admin=True)
    finally:
        if debug:
            file_converter_logger.setLevel(old_logging_level)
    return createdpaths

Example #29

0

Show file

File: webinterface.py Project: chokribr/invenio-1

    def add(self, req, form):
        """
        Add a comment (review) to record with id recid where recid>0
        Also works for adding a remark to basket with id recid where recid<-99
        @param ln: languange
        @param recid: record id
        @param action:  'DISPLAY' to display add form
                        'SUBMIT' to submit comment once form is filled
                        'REPLY' to reply to an already existing comment
        @param msg: the body of the comment/review or remark
        @param score: star score of the review
        @param note: title of the review
        @param comid: comment id, needed for replying
        @param editor_type: the type of editor used for submitting the
                            comment: 'textarea', 'ckeditor'.
        @param subscribe: if set, subscribe user to receive email
                          notifications when new comment are added to
                          this discussion
        @return the full html page.
        """
        argd = wash_urlargd(
            form, {
                'action': (str, "DISPLAY"),
                'msg': (str, ""),
                'note': (str, ''),
                'score': (int, 0),
                'comid': (int, 0),
                'editor_type': (str, ""),
                'subscribe': (str, ""),
                'cookie': (str, "")
            })
        _ = gettext_set_language(argd['ln'])

        actions = ['DISPLAY', 'REPLY', 'SUBMIT']
        uid = getUid(req)

        # Is site ready to accept comments?
        if uid == -1 or (not CFG_WEBCOMMENT_ALLOW_COMMENTS
                         and not CFG_WEBCOMMENT_ALLOW_REVIEWS):
            return page_not_authorized(req,
                                       "../comments/add",
                                       navmenuid='search')

        # Is user allowed to post comment?
        user_info = collect_user_info(req)
        (auth_code_1,
         auth_msg_1) = check_user_can_view_comments(user_info, self.recid)
        (auth_code_2,
         auth_msg_2) = check_user_can_send_comments(user_info, self.recid)
        if isGuestUser(uid):
            cookie = mail_cookie_create_authorize_action(
                VIEWRESTRCOLL, {
                    'collection': guess_primary_collection_of_a_record(
                        self.recid)
                })
            # Save user's value in cookie, so that these "POST"
            # parameters are not lost during login process
            msg_cookie = mail_cookie_create_common(
                'comment_msg', {
                    'msg': argd['msg'],
                    'note': argd['note'],
                    'score': argd['score'],
                    'editor_type': argd['editor_type'],
                    'subscribe': argd['subscribe']
                },
                onetime=True)
            target = CFG_SITE_SECURE_URL + '/youraccount/login' + \
                make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : \
                CFG_SITE_SECURE_URL + user_info['uri'] + '&cookie=' + msg_cookie}, {})
            return redirect_to_url(req, target, norobot=True)
        elif (auth_code_1 or auth_code_2):
            return page_not_authorized(req, "../", \
                text = auth_msg_1 + auth_msg_2)

        if argd['comid']:
            # If replying to a comment, are we on a record that
            # matches the original comment user is replying to?
            if not check_comment_belongs_to_record(argd['comid'], self.recid):
                return page_not_authorized(req, "../", \
                                           text = _("Specified comment does not belong to this record"))

            # Is user trying to reply to a restricted comment? Make
            # sure user has access to it.  We will then inherit its
            # restriction for the new comment
            (auth_code,
             auth_msg) = check_user_can_view_comment(user_info, argd['comid'])
            if auth_code:
                return page_not_authorized(req, "../", \
                                           text = _("You do not have access to the specified comment"))

            # Is user trying to reply to a deleted comment? If so, we
            # let submitted comment go (to not lose possibly submitted
            # content, if comment is submitted while original is
            # deleted), but we "reset" comid to make sure that for
            # action 'REPLY' the original comment is not included in
            # the reply
            if is_comment_deleted(argd['comid']):
                argd['comid'] = 0

        user_info = collect_user_info(req)
        can_attach_files = False
        (auth_code, auth_msg) = check_user_can_attach_file_to_comments(
            user_info, self.recid)
        if not auth_code and (user_info['email'] != 'guest'):
            can_attach_files = True

        warning_msgs = [
        ]  # list of warning tuples (warning_text, warning_color)
        added_files = {}
        if can_attach_files:
            # User is allowed to attach files. Process the files
            file_too_big = False
            formfields = form.get('commentattachment[]', [])
            if not hasattr(formfields,
                           "__getitem__"):  # A single file was uploaded
                formfields = [formfields]
            for formfield in formfields[:CFG_WEBCOMMENT_MAX_ATTACHED_FILES]:
                if hasattr(formfield, "filename") and formfield.filename:
                    filename = formfield.filename
                    dir_to_open = os.path.join(CFG_TMPSHAREDDIR, 'webcomment',
                                               str(uid))
                    try:
                        assert (dir_to_open.startswith(CFG_TMPSHAREDDIR))
                    except AssertionError:
                        register_exception(req=req,
                                           prefix='User #%s tried to upload file to forbidden location: %s' \
                                           % (uid, dir_to_open))

                    if not os.path.exists(dir_to_open):
                        try:
                            os.makedirs(dir_to_open)
                        except:
                            register_exception(req=req, alert_admin=True)

                    ## Before saving the file to disc, wash the filename
                    ## (in particular washing away UNIX and Windows
                    ## (e.g. DFS) paths):
                    filename = os.path.basename(filename.split('\\')[-1])
                    filename = filename.strip()
                    if filename != "":
                        # Check that file does not already exist
                        while os.path.exists(
                                os.path.join(dir_to_open, filename)):
                            basedir, name, extension = decompose_file(filename)
                            new_name = propose_next_docname(name)
                            filename = new_name + extension

                        fp = open(os.path.join(dir_to_open, filename), "w")
                        # FIXME: temporary, waiting for wsgi handler to be
                        # fixed. Once done, read chunk by chunk
                        #                         while formfield.file:
                        #                             fp.write(formfield.file.read(10240))
                        fp.write(formfield.file.read())
                        fp.close()
                        # Isn't this file too big?
                        file_size = os.path.getsize(
                            os.path.join(dir_to_open, filename))
                        if CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE > 0 and \
                               file_size > CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE:
                            os.remove(os.path.join(dir_to_open, filename))
                            # One file is too big: record that,
                            # dismiss all uploaded files and re-ask to
                            # upload again
                            file_too_big = True
                            try:
                                raise InvenioWebCommentWarning(
                                    _('The size of file \\"%(x_file)s\\" (%(x_size)s) is larger than maximum allowed file size (%(x_max)s). Select files again.',
                                      x_file=cgi.escape(filename),
                                      x_size=str(file_size / 1024) + 'KB',
                                      x_max=str(
                                          CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE /
                                          1024) + 'KB'))
                            except InvenioWebCommentWarning as exc:
                                register_exception(stream='warning')
                                warning_msgs.append((exc.message, ''))
                            #warning_msgs.append(('WRN_WEBCOMMENT_MAX_FILE_SIZE_REACHED', cgi.escape(filename), str(file_size/1024) + 'KB', str(CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE/1024) + 'KB'))
                        else:
                            added_files[filename] = os.path.join(
                                dir_to_open, filename)

            if file_too_big:
                # One file was too big. Removed all uploaded filed
                for filepath in added_files.items():
                    try:
                        os.remove(filepath)
                    except:
                        # File was already removed or does not exist?
                        pass

        client_ip_address = req.remote_ip
        check_warnings = []
        (ok, problem) = check_recID_is_in_range(self.recid, check_warnings,
                                                argd['ln'])
        if ok:
            title, description, keywords = websearch_templates.tmpl_record_page_header_content(
                req, self.recid, argd['ln'])
            navtrail = create_navtrail_links(
                cc=guess_primary_collection_of_a_record(self.recid))
            if navtrail:
                navtrail += ' &gt; '
            navtrail += '<a class="navtrail" href="%s/%s/%s?ln=%s">' % (
                CFG_SITE_URL, CFG_SITE_RECORD, self.recid, argd['ln'])
            navtrail += cgi.escape(title)
            navtrail += '</a>'
            navtrail += '&gt; <a class="navtrail" href="%s/%s/%s/%s/?ln=%s">%s</a>' % (
                CFG_SITE_URL, CFG_SITE_RECORD, self.recid,
                self.discussion == 1 and 'reviews' or 'comments', argd['ln'],
                self.discussion == 1 and _('Reviews') or _('Comments'))

            if argd['action'] not in actions:
                argd['action'] = 'DISPLAY'

            if not argd['msg']:
                # User had to login in-between, so retrieve msg
                # from cookie
                try:
                    (kind,
                     cookie_argd) = mail_cookie_check_common(argd['cookie'],
                                                             delete=True)

                    argd.update(cookie_argd)
                except InvenioWebAccessMailCookieDeletedError:
                    return redirect_to_url(req, CFG_SITE_SECURE_URL + '/'+ CFG_SITE_RECORD +'/' + \
                                           str(self.recid) + (self.discussion==1 and \
                                                              '/reviews' or '/comments'))
                except InvenioWebAccessMailCookieError:
                    # Invalid or empty cookie: continue
                    pass

            subscribe = False
            if argd['subscribe'] and \
               get_user_subscription_to_discussion(self.recid, uid) == 0:
                # User is not already subscribed, and asked to subscribe
                subscribe = True

            body = perform_request_add_comment_or_remark(
                recID=self.recid,
                ln=argd['ln'],
                uid=uid,
                action=argd['action'],
                msg=argd['msg'],
                note=argd['note'],
                score=argd['score'],
                reviews=self.discussion,
                comID=argd['comid'],
                client_ip_address=client_ip_address,
                editor_type=argd['editor_type'],
                can_attach_files=can_attach_files,
                subscribe=subscribe,
                req=req,
                attached_files=added_files,
                warnings=warning_msgs)

            if self.discussion:
                title = _("Add Review")
            else:
                title = _("Add Comment")

            jqueryheader = '''
            <script src="%(CFG_SITE_URL)s/vendors/jquery-multifile/jquery.MultiFile.pack.js" type="text/javascript"></script>
            ''' % {
                'CFG_SITE_URL': CFG_SITE_URL
            }

            return page(title=title,
                        body=body,
                        navtrail=navtrail,
                        uid=uid,
                        language=CFG_SITE_LANG,
                        verbose=1,
                        req=req,
                        navmenuid='search',
                        metaheaderadd=jqueryheader)
        # id not in range
        else:
            return page(title=_("Record Not Found"),
                        body=problem,
                        uid=uid,
                        verbose=1,
                        req=req,
                        navmenuid='search')

Example #30

0

Show file

File: connector.py Project: dset0x/invenio

def process_CKEditor_upload(form, uid, user_files_path, user_files_absolute_path,
                            recid=None, allowed_types=default_allowed_types):
    """
    Process a file upload request.

    @param form: the form as in req object.
    @type form: dict
    @param uid: the user ID of the user uploading the file.
    @type uid: int
    @param user_files_path: the base URL where the file can be
        accessed from the web after upload.
        Note that you have to implement your own handler to stream the files from the directory
        C{user_files_absolute_path} if you set this value.
    @type user_files_path: string
    @param user_files_absolute_path: the base path on the server where
        the files should be saved.
        Eg:C{%(CFG_DATADIR)s/comments/%(recid)s/%(uid)s}
    @type user_files_absolute_path: string
    @param recid: the record ID for which we upload a file. Leave None if not relevant.
    @type recid: int
    @param allowed_types: types allowed for uploading. These
        are supported by CKEditor: ['File', 'Image', 'Flash', 'Media']
    @type allowed_types: list of strings
    @return: (msg, uploaded_file_path, uploaded_file_name, uploaded_file_url, callback_function)
    """
    msg = ''
    filename = ''
    formfile = None
    uploaded_file_path = ''
    user_files_path = ''

    for key, formfields in form.items():
        if key != 'upload':
            continue
        if hasattr(formfields, "filename") and formfields.filename:
            # We have found our file
            filename = formfields.filename
            formfile = formfields.file
            break

    can_upload_file_p = False
    if not form['type'] in allowed_types:
        # Is the type sent through the form ok?
        msg = 'You are not allowed to upload a file of this type'
    else:
        # Is user allowed to upload such file extension?
        basedir, name, extension = decompose_file(filename)
        extension = extension[1:] # strip leading dot
        if extension in allowed_extensions.get(form['type'], []):
            can_upload_file_p = True

    if not can_upload_file_p:
        msg = 'You are not allowed to upload a file of this type'
    elif filename and formfile:
        ## Before saving the file to disk, wash the filename (in particular
        ## washing away UNIX and Windows (e.g. DFS) paths):
        filename = os.path.basename(filename.split('\\')[-1])
        # Remove \ / | : ? *
	filename = re.sub ( '\\\\|\\/|\\||\\:|\\?|\\*|"|<|>|[\x00-\x1f\x7f-\x9f]/', '_', filename)
        filename = filename.strip()
        if filename != "":
            # Check that file does not already exist
            n = 1
            while os.path.exists(os.path.join(user_files_absolute_path, filename)):
                basedir, name, extension = decompose_file(filename)
                new_name = propose_next_docname(name)
                filename = new_name + extension

            # This may be dangerous if the file size is bigger than the available memory
            fp = open(os.path.join(user_files_absolute_path, filename), "w")
            fp.write(formfile.read())
            fp.close()

            uploaded_file_path = os.path.join(user_files_absolute_path, filename)
            uploaded_file_name = filename

    return (msg, uploaded_file_path, filename, user_files_path, form['CKEditorFuncNum'])

Example #31

0

Show file

File: Shared_Functions.py Project: chokribr/invenio-1

def createRelatedFormats(fullpath,
                         overwrite=True,
                         debug=False,
                         consider_version=False):
    """Given a fullpath, this function extracts the file's extension and
    finds in which additional format the file can be converted and converts it.
    @param fullpath: (string) complete path to file
    @param overwrite: (bool) overwrite already existing formats
    @param consider_version: (bool) if True, consider the version info
                             in C{fullpath} to find missing format
                             for that specific version, if C{fullpath}
                             contains version info
    Return a list of the paths to the converted files
    """
    file_converter_logger = get_file_converter_logger()
    old_logging_level = file_converter_logger.getEffectiveLevel()
    if debug:
        file_converter_logger.setLevel(DEBUG)
    try:
        createdpaths = []
        if consider_version:
            try:
                basedir, filename, extension, version = decompose_file_with_version(
                    fullpath)
            except:
                basedir, filename, extension = decompose_file(fullpath)
                version = 0
        else:
            basedir, filename, extension = decompose_file(fullpath)
            version = 0
        extension = extension.lower()
        if debug:
            print("basedir: %s, filename: %s, extension: %s" %
                  (basedir, filename, extension),
                  file=sys.stderr)

        if overwrite:
            missing_formats = get_missing_formats([fullpath])
        else:
            if version:
                filelist = glob.glob(
                    os.path.join(basedir, '%s*;%s' % (filename, version)))
            else:
                filelist = glob.glob(os.path.join(basedir, '%s*' % filename))
            if debug:
                print("filelist: %s" % filelist, file=sys.stderr)
            missing_formats = get_missing_formats(filelist)
        if debug:
            print("missing_formats: %s" % missing_formats, file=sys.stderr)
        for path, formats in iteritems(missing_formats):
            if debug:
                print("... path: %s, formats: %s" % (path, formats),
                      file=sys.stderr)
            for aformat in formats:
                if debug:
                    print("...... aformat: %s" % aformat, file=sys.stderr)
                newpath = os.path.join(basedir, filename + aformat)
                if debug:
                    print("...... newpath: %s" % newpath, file=sys.stderr)
                try:
                    if CFG_BIBDOCFILE_FILEDIR in basedir:
                        # We should create the new files in a temporary location, not
                        # directly inside the BibDoc directory.
                        newpath = convert_file(path, output_format=aformat)
                    else:
                        convert_file(path, newpath)
                    createdpaths.append(newpath)
                except InvenioWebSubmitFileConverterError as msg:
                    if debug:
                        print("...... Exception: %s" % msg, file=sys.stderr)
                    register_exception(alert_admin=True)
    finally:
        if debug:
            file_converter_logger.setLevel(old_logging_level)
    return createdpaths

Example #32

0

Show file

def Move_Files_to_Storage(parameters, curdir, form, user_info=None):
    """
    The function moves files received from the standard submission's
    form through file input element(s). The document are assigned a
    'doctype' (or category) corresponding to the file input element
    (eg. a file uploaded throught 'DEMOPIC_FILE' will go to
    'DEMOPIC_FILE' doctype/category).

    Websubmit engine builds the following file organization in the
    directory curdir/files:

                  curdir/files
                        |
      _____________________________________________________________________
            |                                   |                          |
      ./file input 1 element's name      ./file input 2 element's name    ....
         (for eg. 'DEMOART_MAILFILE')       (for eg. 'DEMOART_APPENDIX')
         |                                     |
      test1.pdf                             test2.pdf


    There is only one instance of all possible extension(pdf, gz...) in each part
    otherwise we may encounter problems when renaming files.

    + parameters['rename']: if given, all the files in curdir/files
      are renamed.  parameters['rename'] is of the form:
      <PA>elemfilename[re]</PA>* where re is an regexp to select(using
      re.sub) what part of the elem file has to be selected.
      e.g: <PA>file:TEST_FILE_RN</PA>

    + parameters['documenttype']: if given, other formats are created.
      It has 2 possible values: - if "picture" icon in gif format is created
                                - if "fulltext" ps, gz .... formats are created

    + parameters['paths_and_suffixes']: directories to look into and
      corresponding suffix to add to every file inside. It must have
      the same structure as a Python dictionnary of the following form
      {'FrenchAbstract':'french', 'EnglishAbstract':''}

      The keys are the file input element name from the form <=>
      directories in curdir/files The values associated are the
      suffixes which will be added to all the files in
      e.g. curdir/files/FrenchAbstract

    + parameters['iconsize'] need only if 'icon' is selected in
      parameters['documenttype']

    + parameters['paths_and_restrictions']: the restrictions to apply
      to each uploaded file. The parameter must have the same
      structure as a Python dictionnary of the following form:
      {'DEMOART_APPENDIX':'restricted'}
      Files not specified in this parameter are not restricted.
      The specified restrictions can include a variable that can be
      replaced at runtime, for eg:
      {'DEMOART_APPENDIX':'restricted to <PA>file:SuE</PA>'}

    + parameters['paths_and_doctypes']: if a doctype is specified,
      the file will be saved under the 'doctype/collection' instead
      of under the default doctype/collection given by the name
      of the upload element that was used on the websubmit interface.
      to configure the doctype in websubmit, enter the value as in a
      dictionnary, for eg:
      {'PATHS_SWORD_UPL' : 'PUSHED_TO_ARXIV'} -> from
      Demo_Export_Via_Sword [DEMOSWR] Document Types
    """

    global sysno
    paths_and_suffixes = parameters['paths_and_suffixes']
    paths_and_restrictions = parameters['paths_and_restrictions']
    rename = parameters['rename']
    documenttype = parameters['documenttype']
    iconsizes = parameters['iconsize'].split(',')
    paths_and_doctypes = parameters['paths_and_doctypes']

    ## Create an instance of BibRecDocs for the current recid(sysno)
    bibrecdocs = BibRecDocs(sysno)

    paths_and_suffixes = get_dictionary_from_string(paths_and_suffixes)

    paths_and_restrictions = get_dictionary_from_string(paths_and_restrictions)

    paths_and_doctypes = get_dictionary_from_string(paths_and_doctypes)

    ## Go through all the directories specified in the keys
    ## of parameters['paths_and_suffixes']
    for path in paths_and_suffixes.keys():
        ## Check if there is a directory for the current path
        if os.path.exists("%s/files/%s" % (curdir, path)):
            ## Retrieve the restriction to apply to files in this
            ## directory
            restriction = paths_and_restrictions.get(path, '')
            restriction = re.sub('<PA>(?P<content>[^<]*)</PA>',
                                 get_pa_tag_content, restriction)

            ## Go through all the files in curdir/files/path
            for current_file in os.listdir("%s/files/%s" % (curdir, path)):
                ## retrieve filename and extension
                dummy, filename, extension = decompose_file(current_file)
                if extension and extension[0] != ".":
                    extension = '.' + extension
                if len(paths_and_suffixes[path]) != 0:
                    extension = "_%s%s" % (paths_and_suffixes[path], extension)
                ## Build the new file name if rename parameter has been given
                if rename:
                    filename = re.sub('<PA>(?P<content>[^<]*)</PA>', \
                                      get_pa_tag_content, \
                                      parameters['rename'])

                if rename or len(paths_and_suffixes[path]) != 0:
                    ## Rename the file
                    try:
                        # Write the log rename_cmd
                        fd = open("%s/rename_cmd" % curdir, "a+")
                        fd.write("%s/files/%s/%s" % (curdir, path, current_file) + " to " +\
                                  "%s/files/%s/%s%s" % (curdir, path, filename, extension) + "\n\n")
                        ## Rename
                        os.rename("%s/files/%s/%s" % (curdir, path, current_file), \
                                  "%s/files/%s/%s%s" % (curdir, path, filename, extension))

                        fd.close()
                        ## Save the new name in a text file in curdir so that
                        ## the new filename can be used by templates to created the recmysl
                        fd = open("%s/%s_RENAMED" % (curdir, path), "w")
                        fd.write("%s%s" % (filename, extension))
                        fd.close()
                    except OSError as err:
                        msg = "Cannot rename the file.[%s]"
                        msg %= str(err)
                        raise InvenioWebSubmitFunctionWarning(msg)
                fullpath = "%s/files/%s/%s%s" % (curdir, path, filename,
                                                 extension)
                ## Check if there is any existing similar file
                if not bibrecdocs.check_file_exists(fullpath, extension):
                    bibdoc = bibrecdocs.add_new_file(
                        fullpath,
                        doctype=paths_and_doctypes.get(path, path),
                        never_fail=True)
                    bibdoc.set_status(restriction)
                    ## Fulltext
                    if documenttype == "fulltext":
                        additionalformats = createRelatedFormats(fullpath)
                        if len(additionalformats) > 0:
                            for additionalformat in additionalformats:
                                try:
                                    bibrecdocs.add_new_format(additionalformat)
                                except InvenioBibDocFileError:
                                    pass
                    ## Icon
                    elif documenttype == "picture":
                        has_added_default_icon_subformat_p = False
                        for iconsize in iconsizes:
                            try:
                                iconpath, iconname = create_icon({
                                    'input-file':
                                    fullpath,
                                    'icon-scale':
                                    iconsize,
                                    'icon-name':
                                    None,
                                    'icon-file-format':
                                    None,
                                    'multipage-icon':
                                    False,
                                    'multipage-icon-delay':
                                    100,
                                    'verbosity':
                                    0,
                                })
                            except Exception as e:
                                register_exception(
                                    prefix=
                                    'Impossible to create icon for %s (record %s)'
                                    % (fullpath, sysno),
                                    alert_admin=True)
                                continue
                            iconpath = os.path.join(iconpath, iconname)
                            docname = decompose_file(fullpath)[1]
                            try:
                                mybibdoc = bibrecdocs.get_bibdoc(docname)
                            except InvenioBibDocFileError:
                                mybibdoc = None
                            if iconpath is not None and mybibdoc is not None:
                                try:
                                    icon_suffix = iconsize.replace(
                                        '>', '').replace('<', '').replace(
                                            '^', '').replace('!', '')
                                    if not has_added_default_icon_subformat_p:
                                        mybibdoc.add_icon(iconpath)
                                        has_added_default_icon_subformat_p = True
                                    else:
                                        mybibdoc.add_icon(
                                            iconpath,
                                            subformat=
                                            CFG_BIBDOCFILE_DEFAULT_ICON_SUBFORMAT
                                            + "-" + icon_suffix)
                                    ## Save the new icon filename in a text file in curdir so that
                                    ## it can be used by templates to created the recmysl
                                    try:
                                        if not has_added_default_icon_subformat_p:
                                            fd = open(
                                                "%s/%s_ICON" % (curdir, path),
                                                "w")
                                        else:
                                            fd = open(
                                                "%s/%s_ICON_%s" %
                                                (curdir, path,
                                                 iconsize + '_' + icon_suffix),
                                                "w")
                                        fd.write(os.path.basename(iconpath))
                                        fd.close()
                                    except OSError as err:
                                        msg = "Cannot store icon filename.[%s]"
                                        msg %= str(err)
                                        raise InvenioWebSubmitFunctionWarning(
                                            msg)
                                except InvenioBibDocFileError as e:
                                    # Most probably icon already existed.
                                    pass
                            elif mybibdoc is not None:
                                mybibdoc.delete_icon()

    # Update the MARC
    bibdocfile_bin = os.path.join(CFG_BINDIR, 'bibdocfile --yes-i-know')
    run_shell_command(bibdocfile_bin + " --fix-marc --recid=%s",
                      (str(sysno), ))

    # Delete the HB BibFormat cache in the DB, so that the fulltext
    # links do not point to possible dead files
    run_sql(
        "DELETE LOW_PRIORITY from bibfmt WHERE format='HB' AND id_bibrec=%s",
        (sysno, ))

    return ""

Example #33

0

Show file

File: webinterface.py Project: mhellmic/b2share

    def uploadfile(self, req, form):
        """
        Similar to /submit, but only consider files. Nice for
        asynchronous Javascript uploads. Should be used to upload a
        single file.

        Also try to create an icon, and return URL to file(s) + icon(s)

        Authentication is performed based on session ID passed as
        parameter instead of cookie-based authentication, due to the
        use of this URL by the Flash plugin (to upload multiple files
        at once), which does not route cookies.

        FIXME: consider adding /deletefile and /modifyfile functions +
        parsing of additional parameters to rename files, add
        comments, restrictions, etc.
        """
        argd = wash_urlargd(form, {
            'doctype': (str, ''),
            'access': (str, ''),
            'indir': (str, ''),
            'session_id': (str, ''),
            'rename': (str, ''),
            })

        curdir = None
        if "indir" not in form or \
               "doctype" not in form or \
               "access" not in form:
            raise apache.SERVER_RETURN(apache.HTTP_BAD_REQUEST)
        else:
            curdir = os.path.join(CFG_WEBSUBMIT_STORAGEDIR,
                                  argd['indir'],
                                  argd['doctype'],
                                  argd['access'])

        user_info = collect_user_info(req)
        if "session_id" in form:
            # Are we uploading using Flash, which does not transmit
            # cookie? The expect to receive session_id as a form
            # parameter.  First check that IP addresses do not
            # mismatch.

            uid = session.uid
            user_info = collect_user_info(uid)
            try:
                act_fd = file(os.path.join(curdir, 'act'))
                action = act_fd.read()
                act_fd.close()
            except:
                action = ""

        # Is user authorized to perform this action?
        (auth_code, auth_message) = acc_authorize_action(uid, "submit",
                                                         authorized_if_no_roles=not isGuestUser(uid),
                                                         verbose=0,
                                                         doctype=argd['doctype'],
                                                         act=action)
        if acc_is_role("submit", doctype=argd['doctype'], act=action) and auth_code != 0:
            # User cannot submit
            raise apache.SERVER_RETURN(apache.HTTP_UNAUTHORIZED)
        else:
            # Process the upload and get the response
            added_files = {}
            for key, formfields in form.items():
                filename = key.replace("[]", "")
                file_to_open = os.path.join(curdir, filename)
                if hasattr(formfields, "filename") and formfields.filename:
                    dir_to_open = os.path.abspath(os.path.join(curdir,
                                                               'files',
                                                               str(user_info['uid']),
                                                               key))
                    try:
                        assert(dir_to_open.startswith(CFG_WEBSUBMIT_STORAGEDIR))
                    except AssertionError:
                        register_exception(req=req, prefix='curdir="%s", key="%s"' % (curdir, key))
                        raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN)

                    if not os.path.exists(dir_to_open):
                        try:
                            os.makedirs(dir_to_open)
                        except OSError as e:
                            if e.errno != errno.EEXIST:
                                # If the issue is only that directory
                                # already exists, then continue, else
                                # report
                                register_exception(req=req, alert_admin=True)
                                raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN)

                    filename = formfields.filename
                    ## Before saving the file to disc, wash the filename (in particular
                    ## washing away UNIX and Windows (e.g. DFS) paths):
                    filename = os.path.basename(filename.split('\\')[-1])
                    filename = filename.strip()
                    if filename != "":
                        # Check that file does not already exist
                        n = 1
                        while os.path.exists(os.path.join(dir_to_open, filename)):
                            #dirname, basename, extension = decompose_file(new_destination_path)
                            basedir, name, extension = decompose_file(filename)
                            new_name = propose_next_docname(name)
                            filename = new_name + extension
                        # This may be dangerous if the file size is bigger than the available memory
                        fp = open(os.path.join(dir_to_open, filename), "w")
                        fp.write(formfields.file.read())
                        fp.close()
                        fp = open(os.path.join(curdir, "lastuploadedfile"), "w")
                        fp.write(filename)
                        fp.close()
                        fp = open(file_to_open, "w")
                        fp.write(filename)
                        fp.close()
                        try:
                            # Create icon
                            (icon_path, icon_name) = create_icon(
                                { 'input-file'           : os.path.join(dir_to_open, filename),
                                  'icon-name'            : filename, # extension stripped automatically
                                  'icon-file-format'     : 'gif',
                                  'multipage-icon'       : False,
                                  'multipage-icon-delay' : 100,
                                  'icon-scale'           : "300>", # Resize only if width > 300
                                  'verbosity'            : 0,
                                  })

                            icons_dir = os.path.join(os.path.join(curdir,
                                                                  'icons',
                                                                  str(user_info['uid']),
                                                                  key))
                            if not os.path.exists(icons_dir):
                                # Create uid/icons dir if needed
                                try:
                                    os.makedirs(icons_dir)
                                except OSError as e:
                                    if e.errno != errno.EEXIST:
                                        # If the issue is only that
                                        # directory already exists,
                                        # then continue, else report
                                        register_exception(req=req, alert_admin=True)
                                        raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN)
                            os.rename(os.path.join(icon_path, icon_name),
                                      os.path.join(icons_dir, icon_name))
                            added_files[key] = {'name': filename,
                                                'iconName': icon_name}
                        except InvenioWebSubmitIconCreatorError as e:
                            # We could not create the icon
                            added_files[key] = {'name': filename}
                            continue
                    else:
                        raise apache.SERVER_RETURN(apache.HTTP_BAD_REQUEST)

            # Send our response
            if CFG_JSON_AVAILABLE:
                return json.dumps(added_files)

Example #34

0

Show file

File: pdftk_plugin.py Project: mhellmic/b2share

def write_metadata_local(inputfile, outputfile, metadata_dictionary, verbose):
    """
    Metadata write method, takes the .pdf as input and creates a new
    one with the new info.

    @param inputfile: path to the pdf
    @type inputfile: string
    @param outputfile: path to the resulting pdf
    @type outputfile: string
    @param verbose: verbosity
    @type verbose: int
    @param metadata_dictionary: metadata information to update inputfile
    @type metadata_dictionary: dict
    """
    # Take the file name (0 base, 1 name, 2 ext)
    filename = decompose_file(inputfile)[1]

    # Print pdf metadata
    if verbose > 1:
        print('Metadata information in the PDF file ' + filename + ': \n')
        try:
            os.system(CFG_PATH_PDFTK + ' ' + inputfile + ' dump_data')
        except Exception:
            print('Problem with inputfile to PDFTK')

    # Info file for pdftk
    (fd, path_to_info) = tempfile.mkstemp(prefix="wsm_pdf_plugin_info_", \
                                             dir=CFG_TMPDIR)
    os.close(fd)
    file_in = open(path_to_info, 'w')
    if verbose > 5:
        print("Saving PDFTK info file to %s" % path_to_info)

    # User interaction to form the info file
    # Main Case: Dictionary received through option -d
    if not metadata_dictionary == {}:
        for tag in metadata_dictionary:
            line = 'InfoKey: ' + tag + '\nInfoValue: ' + \
                   metadata_dictionary[tag] + '\n'
            if verbose > 0:
                print(line)
            file_in.writelines(line)
    else:
        data_modified = False
        user_input = 'user_input'
        print("Entering interactive mode. Choose what you want to do:")
        while (user_input):
            if not data_modified:
                try:
                    user_input = raw_input('[w]rite / [q]uit\n')
                except:
                    print("Aborting")
                    return
            else:
                try:
                    user_input = raw_input('[w]rite / [q]uit and apply / [a]bort \n')
                except:
                    print("Aborting")
                    return
            if user_input == 'q':
                if not data_modified:
                    return
                break
            elif user_input == 'w':
                try:
                    tag = raw_input('Tag to update:\n')
                    value = raw_input('With value:\n')
                except:
                    print("Aborting")
                    return
                # Write to info file
                line = 'InfoKey: ' + tag + '\nInfoValue: ' + value + '\n'
                data_modified = True
                file_in.writelines(line)
            elif user_input == 'a':
                return
            else:
                print("Invalid option: ")
    file_in.close()

    (fd, pdf_temp_path) = tempfile.mkstemp(prefix="wsm_pdf_plugin_pdf_", \
                                              dir=CFG_TMPDIR)
    os.close(fd)

    # Now we call pdftk tool to update the info on a pdf
    #try:
    cmd_pdftk = '%s %s update_info %s output %s'
    (exit_status, output_std, output_err) = \
                  run_shell_command(cmd_pdftk,
                                    args=(CFG_PATH_PDFTK, inputfile,
                                          path_to_info, pdf_temp_path))
    if verbose > 5:
        print(output_std, output_err)

    if os.path.exists(pdf_temp_path):
        # Move to final destination if exist
        try:
            shutil.move(pdf_temp_path, outputfile)
        except Exception as err:
            raise InvenioWebSubmitFileMetadataRuntimeError("Could not move %s to %s" % \
                                                           (pdf_temp_path, outputfile))
    else:
        # Something bad happened
        raise InvenioWebSubmitFileMetadataRuntimeError("Could not update metadata " + output_err)

Example #35

0

Show file

File: webinterface.py Project: chokribr/invenio-1

    def uploadfile(self, req, form):
        """
        Similar to /submit, but only consider files. Nice for
        asynchronous Javascript uploads. Should be used to upload a
        single file.

        Also try to create an icon, and return URL to file(s) + icon(s)

        Authentication is performed based on session ID passed as
        parameter instead of cookie-based authentication, due to the
        use of this URL by the Flash plugin (to upload multiple files
        at once), which does not route cookies.

        FIXME: consider adding /deletefile and /modifyfile functions +
        parsing of additional parameters to rename files, add
        comments, restrictions, etc.
        """
        argd = wash_urlargd(
            form, {
                'doctype': (str, ''),
                'access': (str, ''),
                'indir': (str, ''),
                'session_id': (str, ''),
                'rename': (str, ''),
            })

        curdir = None
        if "indir" not in form or \
               "doctype" not in form or \
               "access" not in form:
            raise apache.SERVER_RETURN(apache.HTTP_BAD_REQUEST)
        else:
            curdir = os.path.join(CFG_WEBSUBMIT_STORAGEDIR, argd['indir'],
                                  argd['doctype'], argd['access'])

        user_info = collect_user_info(req)
        if "session_id" in form:
            # Are we uploading using Flash, which does not transmit
            # cookie? The expect to receive session_id as a form
            # parameter.  First check that IP addresses do not
            # mismatch.

            uid = session.uid
            user_info = collect_user_info(uid)
            try:
                act_fd = file(os.path.join(curdir, 'act'))
                action = act_fd.read()
                act_fd.close()
            except:
                action = ""

        try:
            recid_fd = file(os.path.join(curdir, 'SN'))
            recid = recid_fd.read()
            recid_fd.close()
        except:
            recid = ''
        user_is_owner = False
        if recid:
            user_is_owner = is_user_owner_of_record(user_info, recid)

        try:
            categ_fd = file(os.path.join(curdir, 'combo%s' % argd['doctype']))
            categ = categ_fd.read()
            categ_fd.close()
        except IOError:
            categ = '*'

        # Is user authorized to perform this action?
        (auth_code, auth_message) = acc_authorize_action(
            uid,
            "submit",
            authorized_if_no_roles=not isGuestUser(uid),
            verbose=0,
            doctype=argd['doctype'],
            act=action,
            categ=categ)
        if acc_is_role("submit", doctype=argd['doctype'],
                       act=action) and auth_code != 0 and not user_is_owner:
            # User cannot submit
            raise apache.SERVER_RETURN(apache.HTTP_UNAUTHORIZED)
        else:
            # Process the upload and get the response
            added_files = {}
            for key, formfields in form.items():
                filename = key.replace("[]", "")
                file_to_open = os.path.join(curdir, filename)
                if hasattr(formfields, "filename") and formfields.filename:
                    dir_to_open = os.path.abspath(
                        os.path.join(curdir, 'files', str(user_info['uid']),
                                     key))
                    try:
                        assert (
                            dir_to_open.startswith(CFG_WEBSUBMIT_STORAGEDIR))
                    except AssertionError:
                        register_exception(req=req,
                                           prefix='curdir="%s", key="%s"' %
                                           (curdir, key))
                        raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN)

                    if not os.path.exists(dir_to_open):
                        try:
                            os.makedirs(dir_to_open)
                        except OSError as e:
                            if e.errno != errno.EEXIST:
                                # If the issue is only that directory
                                # already exists, then continue, else
                                # report
                                register_exception(req=req, alert_admin=True)
                                raise apache.SERVER_RETURN(
                                    apache.HTTP_FORBIDDEN)

                    filename = formfields.filename
                    ## Before saving the file to disc, wash the filename (in particular
                    ## washing away UNIX and Windows (e.g. DFS) paths):
                    filename = os.path.basename(filename.split('\\')[-1])
                    filename = filename.strip()
                    if filename != "":
                        # Check that file does not already exist
                        n = 1
                        while os.path.exists(
                                os.path.join(dir_to_open, filename)):
                            #dirname, basename, extension = decompose_file(new_destination_path)
                            basedir, name, extension = decompose_file(filename)
                            new_name = propose_next_docname(name)
                            filename = new_name + extension
                        # This may be dangerous if the file size is bigger than the available memory
                        fp = open(os.path.join(dir_to_open, filename), "w")
                        fp.write(formfields.file.read())
                        fp.close()
                        fp = open(os.path.join(curdir, "lastuploadedfile"),
                                  "w")
                        fp.write(filename)
                        fp.close()
                        fp = open(file_to_open, "w")
                        fp.write(filename)
                        fp.close()
                        try:
                            # Create icon
                            (icon_path, icon_name) = create_icon({
                                'input-file':
                                os.path.join(dir_to_open, filename),
                                'icon-name':
                                filename,  # extension stripped automatically
                                'icon-file-format':
                                'gif',
                                'multipage-icon':
                                False,
                                'multipage-icon-delay':
                                100,
                                'icon-scale':
                                "300>",  # Resize only if width > 300
                                'verbosity':
                                0,
                            })

                            icons_dir = os.path.join(
                                os.path.join(curdir, 'icons',
                                             str(user_info['uid']), key))
                            if not os.path.exists(icons_dir):
                                # Create uid/icons dir if needed
                                try:
                                    os.makedirs(icons_dir)
                                except OSError as e:
                                    if e.errno != errno.EEXIST:
                                        # If the issue is only that
                                        # directory already exists,
                                        # then continue, else report
                                        register_exception(req=req,
                                                           alert_admin=True)
                                        raise apache.SERVER_RETURN(
                                            apache.HTTP_FORBIDDEN)
                            os.rename(os.path.join(icon_path, icon_name),
                                      os.path.join(icons_dir, icon_name))
                            added_files[key] = {
                                'name': filename,
                                'iconName': icon_name
                            }
                        except InvenioWebSubmitIconCreatorError as e:
                            # We could not create the icon
                            added_files[key] = {'name': filename}
                            continue
                    else:
                        raise apache.SERVER_RETURN(apache.HTTP_BAD_REQUEST)

            # Send our response
            if CFG_JSON_AVAILABLE:
                return json.dumps(added_files)

Example #36

0

Show file

def write_metadata_local(inputfile, outputfile, metadata_dictionary, verbose):
    """
    Metadata write method, takes the .pdf as input and creates a new
    one with the new info.

    @param inputfile: path to the pdf
    @type inputfile: string
    @param outputfile: path to the resulting pdf
    @type outputfile: string
    @param verbose: verbosity
    @type verbose: int
    @param metadata_dictionary: metadata information to update inputfile
    @type metadata_dictionary: dict
    """
    # Take the file name (0 base, 1 name, 2 ext)
    filename = decompose_file(inputfile)[1]

    # Print pdf metadata
    if verbose > 1:
        print('Metadata information in the PDF file ' + filename + ': \n')
        try:
            os.system(CFG_PATH_PDFTK + ' ' + inputfile + ' dump_data')
        except Exception:
            print('Problem with inputfile to PDFTK')

    # Info file for pdftk
    (fd, path_to_info) = tempfile.mkstemp(prefix="wsm_pdf_plugin_info_", \
                                             dir=CFG_TMPDIR)
    os.close(fd)
    file_in = open(path_to_info, 'w')
    if verbose > 5:
        print("Saving PDFTK info file to %s" % path_to_info)

    # User interaction to form the info file
    # Main Case: Dictionary received through option -d
    if not metadata_dictionary == {}:
        for tag in metadata_dictionary:
            line = 'InfoKey: ' + tag + '\nInfoValue: ' + \
                   metadata_dictionary[tag] + '\n'
            if verbose > 0:
                print(line)
            file_in.writelines(line)
    else:
        data_modified = False
        user_input = 'user_input'
        print("Entering interactive mode. Choose what you want to do:")
        while (user_input):
            if not data_modified:
                try:
                    user_input = raw_input('[w]rite / [q]uit\n')
                except:
                    print("Aborting")
                    return
            else:
                try:
                    user_input = raw_input(
                        '[w]rite / [q]uit and apply / [a]bort \n')
                except:
                    print("Aborting")
                    return
            if user_input == 'q':
                if not data_modified:
                    return
                break
            elif user_input == 'w':
                try:
                    tag = raw_input('Tag to update:\n')
                    value = raw_input('With value:\n')
                except:
                    print("Aborting")
                    return
                # Write to info file
                line = 'InfoKey: ' + tag + '\nInfoValue: ' + value + '\n'
                data_modified = True
                file_in.writelines(line)
            elif user_input == 'a':
                return
            else:
                print("Invalid option: ")
    file_in.close()

    (fd, pdf_temp_path) = tempfile.mkstemp(prefix="wsm_pdf_plugin_pdf_", \
                                              dir=CFG_TMPDIR)
    os.close(fd)

    # Now we call pdftk tool to update the info on a pdf
    #try:
    cmd_pdftk = '%s %s update_info %s output %s'
    (exit_status, output_std, output_err) = \
                  run_shell_command(cmd_pdftk,
                                    args=(CFG_PATH_PDFTK, inputfile,
                                          path_to_info, pdf_temp_path))
    if verbose > 5:
        print(output_std, output_err)

    if os.path.exists(pdf_temp_path):
        # Move to final destination if exist
        try:
            shutil.move(pdf_temp_path, outputfile)
        except Exception as err:
            raise InvenioWebSubmitFileMetadataRuntimeError("Could not move %s to %s" % \
                                                           (pdf_temp_path, outputfile))
    else:
        # Something bad happened
        raise InvenioWebSubmitFileMetadataRuntimeError(
            "Could not update metadata " + output_err)

Example #37

0

Show file

File: webinterface.py Project: jalavik/invenio

    def add(self, req, form):
        """
        Add a comment (review) to record with id recid where recid>0
        Also works for adding a remark to basket with id recid where recid<-99
        @param ln: languange
        @param recid: record id
        @param action:  'DISPLAY' to display add form
                        'SUBMIT' to submit comment once form is filled
                        'REPLY' to reply to an already existing comment
        @param msg: the body of the comment/review or remark
        @param score: star score of the review
        @param note: title of the review
        @param comid: comment id, needed for replying
        @param editor_type: the type of editor used for submitting the
                            comment: 'textarea', 'ckeditor'.
        @param subscribe: if set, subscribe user to receive email
                          notifications when new comment are added to
                          this discussion
        @return the full html page.
        """
        argd = wash_urlargd(
            form,
            {
                "action": (str, "DISPLAY"),
                "msg": (str, ""),
                "note": (str, ""),
                "score": (int, 0),
                "comid": (int, 0),
                "editor_type": (str, ""),
                "subscribe": (str, ""),
                "cookie": (str, ""),
            },
        )
        _ = gettext_set_language(argd["ln"])

        actions = ["DISPLAY", "REPLY", "SUBMIT"]
        uid = getUid(req)

        # Is site ready to accept comments?
        if uid == -1 or (not CFG_WEBCOMMENT_ALLOW_COMMENTS and not CFG_WEBCOMMENT_ALLOW_REVIEWS):
            return page_not_authorized(req, "../comments/add", navmenuid="search")

        # Is user allowed to post comment?
        user_info = collect_user_info(req)
        (auth_code_1, auth_msg_1) = check_user_can_view_comments(user_info, self.recid)
        (auth_code_2, auth_msg_2) = check_user_can_send_comments(user_info, self.recid)
        if isGuestUser(uid):
            cookie = mail_cookie_create_authorize_action(
                VIEWRESTRCOLL, {"collection": guess_primary_collection_of_a_record(self.recid)}
            )
            # Save user's value in cookie, so that these "POST"
            # parameters are not lost during login process
            msg_cookie = mail_cookie_create_common(
                "comment_msg",
                {
                    "msg": argd["msg"],
                    "note": argd["note"],
                    "score": argd["score"],
                    "editor_type": argd["editor_type"],
                    "subscribe": argd["subscribe"],
                },
                onetime=True,
            )
            target = (
                CFG_SITE_SECURE_URL
                + "/youraccount/login"
                + make_canonical_urlargd(
                    {
                        "action": cookie,
                        "ln": argd["ln"],
                        "referer": CFG_SITE_SECURE_URL + user_info["uri"] + "&cookie=" + msg_cookie,
                    },
                    {},
                )
            )
            return redirect_to_url(req, target, norobot=True)
        elif auth_code_1 or auth_code_2:
            return page_not_authorized(req, "../", text=auth_msg_1 + auth_msg_2)

        if argd["comid"]:
            # If replying to a comment, are we on a record that
            # matches the original comment user is replying to?
            if not check_comment_belongs_to_record(argd["comid"], self.recid):
                return page_not_authorized(req, "../", text=_("Specified comment does not belong to this record"))

            # Is user trying to reply to a restricted comment? Make
            # sure user has access to it.  We will then inherit its
            # restriction for the new comment
            (auth_code, auth_msg) = check_user_can_view_comment(user_info, argd["comid"])
            if auth_code:
                return page_not_authorized(req, "../", text=_("You do not have access to the specified comment"))

            # Is user trying to reply to a deleted comment? If so, we
            # let submitted comment go (to not lose possibly submitted
            # content, if comment is submitted while original is
            # deleted), but we "reset" comid to make sure that for
            # action 'REPLY' the original comment is not included in
            # the reply
            if is_comment_deleted(argd["comid"]):
                argd["comid"] = 0

        user_info = collect_user_info(req)
        can_attach_files = False
        (auth_code, auth_msg) = check_user_can_attach_file_to_comments(user_info, self.recid)
        if not auth_code and (user_info["email"] != "guest"):
            can_attach_files = True

        warning_msgs = []  # list of warning tuples (warning_text, warning_color)
        added_files = {}
        if can_attach_files:
            # User is allowed to attach files. Process the files
            file_too_big = False
            formfields = form.get("commentattachment[]", [])
            if not hasattr(formfields, "__getitem__"):  # A single file was uploaded
                formfields = [formfields]
            for formfield in formfields[:CFG_WEBCOMMENT_MAX_ATTACHED_FILES]:
                if hasattr(formfield, "filename") and formfield.filename:
                    filename = formfield.filename
                    dir_to_open = os.path.join(CFG_TMPSHAREDDIR, "webcomment", str(uid))
                    try:
                        assert dir_to_open.startswith(CFG_TMPSHAREDDIR)
                    except AssertionError:
                        register_exception(
                            req=req,
                            prefix="User #%s tried to upload file to forbidden location: %s" % (uid, dir_to_open),
                        )

                    if not os.path.exists(dir_to_open):
                        try:
                            os.makedirs(dir_to_open)
                        except:
                            register_exception(req=req, alert_admin=True)

                    ## Before saving the file to disc, wash the filename
                    ## (in particular washing away UNIX and Windows
                    ## (e.g. DFS) paths):
                    filename = os.path.basename(filename.split("\\")[-1])
                    filename = filename.strip()
                    if filename != "":
                        # Check that file does not already exist
                        while os.path.exists(os.path.join(dir_to_open, filename)):
                            basedir, name, extension = decompose_file(filename)
                            new_name = propose_next_docname(name)
                            filename = new_name + extension

                        fp = open(os.path.join(dir_to_open, filename), "w")
                        # FIXME: temporary, waiting for wsgi handler to be
                        # fixed. Once done, read chunk by chunk
                        #                         while formfield.file:
                        #                             fp.write(formfield.file.read(10240))
                        fp.write(formfield.file.read())
                        fp.close()
                        # Isn't this file too big?
                        file_size = os.path.getsize(os.path.join(dir_to_open, filename))
                        if CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE > 0 and file_size > CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE:
                            os.remove(os.path.join(dir_to_open, filename))
                            # One file is too big: record that,
                            # dismiss all uploaded files and re-ask to
                            # upload again
                            file_too_big = True
                            try:
                                raise InvenioWebCommentWarning(
                                    _(
                                        'The size of file \\"%(x_file)s\\" (%(x_size)s) is larger than maximum allowed file size (%(x_max)s). Select files again.',
                                        x_file=cgi.escape(filename),
                                        x_size=str(file_size / 1024) + "KB",
                                        x_max=str(CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE / 1024) + "KB",
                                    )
                                )
                            except InvenioWebCommentWarning as exc:
                                register_exception(stream="warning")
                                warning_msgs.append((exc.message, ""))
                            # warning_msgs.append(('WRN_WEBCOMMENT_MAX_FILE_SIZE_REACHED', cgi.escape(filename), str(file_size/1024) + 'KB', str(CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE/1024) + 'KB'))
                        else:
                            added_files[filename] = os.path.join(dir_to_open, filename)

            if file_too_big:
                # One file was too big. Removed all uploaded filed
                for filepath in added_files.items():
                    try:
                        os.remove(filepath)
                    except:
                        # File was already removed or does not exist?
                        pass

        client_ip_address = req.remote_ip
        check_warnings = []
        (ok, problem) = check_recID_is_in_range(self.recid, check_warnings, argd["ln"])
        if ok:
            title, description, keywords = websearch_templates.tmpl_record_page_header_content(
                req, self.recid, argd["ln"]
            )
            navtrail = create_navtrail_links(cc=guess_primary_collection_of_a_record(self.recid))
            if navtrail:
                navtrail += " &gt; "
            navtrail += '<a class="navtrail" href="%s/%s/%s?ln=%s">' % (
                CFG_SITE_URL,
                CFG_SITE_RECORD,
                self.recid,
                argd["ln"],
            )
            navtrail += cgi.escape(title)
            navtrail += "</a>"
            navtrail += '&gt; <a class="navtrail" href="%s/%s/%s/%s/?ln=%s">%s</a>' % (
                CFG_SITE_URL,
                CFG_SITE_RECORD,
                self.recid,
                self.discussion == 1 and "reviews" or "comments",
                argd["ln"],
                self.discussion == 1 and _("Reviews") or _("Comments"),
            )

            if argd["action"] not in actions:
                argd["action"] = "DISPLAY"

            if not argd["msg"]:
                # User had to login in-between, so retrieve msg
                # from cookie
                try:
                    (kind, cookie_argd) = mail_cookie_check_common(argd["cookie"], delete=True)

                    argd.update(cookie_argd)
                except InvenioWebAccessMailCookieDeletedError:
                    return redirect_to_url(
                        req,
                        CFG_SITE_SECURE_URL
                        + "/"
                        + CFG_SITE_RECORD
                        + "/"
                        + str(self.recid)
                        + (self.discussion == 1 and "/reviews" or "/comments"),
                    )
                except InvenioWebAccessMailCookieError:
                    # Invalid or empty cookie: continue
                    pass

            subscribe = False
            if argd["subscribe"] and get_user_subscription_to_discussion(self.recid, uid) == 0:
                # User is not already subscribed, and asked to subscribe
                subscribe = True

            body = perform_request_add_comment_or_remark(
                recID=self.recid,
                ln=argd["ln"],
                uid=uid,
                action=argd["action"],
                msg=argd["msg"],
                note=argd["note"],
                score=argd["score"],
                reviews=self.discussion,
                comID=argd["comid"],
                client_ip_address=client_ip_address,
                editor_type=argd["editor_type"],
                can_attach_files=can_attach_files,
                subscribe=subscribe,
                req=req,
                attached_files=added_files,
                warnings=warning_msgs,
            )

            if self.discussion:
                title = _("Add Review")
            else:
                title = _("Add Comment")

            jqueryheader = """
            <script src="%(CFG_SITE_URL)s/vendors/jquery-multifile/jquery.MultiFile.pack.js" type="text/javascript"></script>
            """ % {
                "CFG_SITE_URL": CFG_SITE_URL
            }

            return page(
                title=title,
                body=body,
                navtrail=navtrail,
                uid=uid,
                language=CFG_SITE_LANG,
                verbose=1,
                req=req,
                navmenuid="search",
                metaheaderadd=jqueryheader,
            )
        # id not in range
        else:
            return page(title=_("Record Not Found"), body=problem, uid=uid, verbose=1, req=req, navmenuid="search")

Example #38

0

Show file

File: file_metadata.py Project: mhellmic/b2share

def read_metadata(inputfile, force=None, remote=False,
                  loginpw=None, verbose=0):
    """
    Returns metadata extracted from given file as dictionary.

    Availability depends on input file format and installed plugins
    (return C{TypeError} if unsupported file format).

    @param inputfile: path to a file
    @type inputfile: string
    @param verbose: verbosity
    @type verbose: int
    @param force: name of plugin to use, to skip plugin auto-discovery
    @type force: string
    @param remote: if the file is accessed remotely or not
    @type remote: boolean
    @param loginpw: credentials to access secure servers (username:password)
    @type loginpw: string
    @return: dictionary of metadata tags as keys, and (interpreted)
             value as value
    @rtype: dict
    @raise TypeError: if file format is not supported.
    @raise RuntimeError: if required library to process file is missing.
    @raise InvenioWebSubmitFileMetadataRuntimeError: when metadata cannot be read.
    """
    metadata = None
    # Check file type (0 base, 1 name, 2 ext)
    ext = decompose_file(inputfile)[2]
    if verbose > 5:
        print(ext.lower(), 'extension to extract from')

    # Loop through the plugins to find a good one for given file
    for plugin_name, plugin in iteritems(metadata_extractor_plugins):
        # Local file
        if 'can_read_local' in plugin and \
            plugin['can_read_local'](inputfile) and not remote and \
            (not force or plugin_name == force):
            if verbose > 5:
                print('Using ' + plugin_name)
            fetched_metadata = plugin['read_metadata_local'](inputfile,
                                                             verbose)
            if not metadata:
                metadata = fetched_metadata
            else:
                metadata.update(fetched_metadata)

        # Remote file
        elif remote and 'can_read_remote' in plugin and \
            plugin['can_read_remote'](inputfile) and \
            (not force or plugin_name == force):
            if verbose > 5:
                print('Using ' + plugin_name)
            fetched_metadata = plugin['read_metadata_remote'](inputfile,
                                                              loginpw,
                                                              verbose)
            if not metadata:
                metadata = fetched_metadata
            else:
                metadata.update(fetched_metadata)

    # Return in case we have something
    if metadata is not None:
        return metadata

    # Case of no plugin found, raise
    raise TypeError, 'Unsupported file type'