def test_convert_fileobj(self, oldhepdata_file, oldhepdata_yaml_path):
        # test fileobj
        path = os.path.join(self.current_tmp, 'yaml')
        hepdata_converter_ws_client.convert(self.get_server_url(), oldhepdata_file, path,
                                            options={'input_format': 'oldhepdata'})

        self.assertDirsEqual(oldhepdata_yaml_path, path)
    def test_convert_no_extract(self, oldhepdata_path, oldhepdata_yaml_path):
        output = BytesIO()
        hepdata_converter_ws_client.convert(
            self.get_server_url(),
            oldhepdata_path,
            output,
            options={'input_format': 'oldhepdata'},
            extract=False)
        output.seek(0)
        tmp_path = os.path.join(self.current_tmp, '1')
        with tarfile.open(mode='r:gz', fileobj=output) as tar:
            tar.extractall(tmp_path)
        self.assertDirsEqual(os.path.join(tmp_path, ARCHIVE_NAME),
                             oldhepdata_yaml_path)

        path = os.path.join(self.current_tmp, 'data.tar.gz')
        hepdata_converter_ws_client.convert(
            self.get_server_url(),
            oldhepdata_path,
            path,
            options={'input_format': 'oldhepdata'},
            extract=False)

        tmp_path = os.path.join(self.current_tmp, '2')

        with tarfile.open(path, mode='r:gz') as tar:
            tar.extractall(tmp_path)
        self.assertDirsEqual(os.path.join(tmp_path, ARCHIVE_NAME),
                             oldhepdata_yaml_path)
    def test_convert_fileobj(self, oldhepdata_file, oldhepdata_yaml_path):
        # test fileobj
        path = os.path.join(self.current_tmp, 'yaml')
        hepdata_converter_ws_client.convert(
            self.get_server_url(),
            oldhepdata_file,
            path,
            options={'input_format': 'oldhepdata'})

        self.assertDirsEqual(oldhepdata_yaml_path, path)
    def test_caching(self, oldhepdata_path, oldhepdata_yaml_path):
        # test paths
        path_1 = os.path.join(self.current_tmp, 'yaml', '1')
        hepdata_converter_ws_client.convert(self.get_server_url(), oldhepdata_path, path_1,
                                            options={'input_format': 'oldhepdata'}, id=1)

        self.assertDirsEqual(oldhepdata_yaml_path, path_1)

        path_2 = os.path.join(self.current_tmp, 'yaml', '2')
        hepdata_converter_ws_client.convert(self.get_server_url(), oldhepdata_path, path_2,
                                            options={'input_format': 'oldhepdata'}, id=1)

        self.assertDirsEqual(oldhepdata_yaml_path, path_2)

        self.assertDirsEqual(path_1, path_2)
Example #5
0
def convert_zip_archive(input_archive, output_archive, options):
    """ Convert a zip archive into a targz path with given options. """
    input_root_dir = tempfile.mkdtemp()
    with zipfile.ZipFile(input_archive, 'r') as zip_archive:
        zip_archive.extractall(path=input_root_dir)

    # Find the appropriate file/directory in the input archive
    input = options.get('input_format', 'yaml')
    validation = find_file_in_directory(
        input_root_dir,
        lambda x: x == 'submission.yaml' if input == 'yaml' else x.endswith('.oldhepdata')
    )
    if not validation:
        return None

    input_directory, input_file = validation

    successful = convert(
        CFG_CONVERTER_URL,
        input_directory if input == 'yaml' else input_file,
        output=output_archive,
        options=options,
        extract=False,
    )
    rmtree(input_root_dir)

    # Error occurred, the output is a HTML file
    if not successful:
        output_file = output_archive[:-7] + '.html'
    else:
        output_file = output_archive
    move(output_archive, output_file)

    return output_file
Example #6
0
def convert_zip_archive(input_archive, output_archive, options):
    """
    Convert a zip archive into a targz path with given options.

    :param input_archive:
    :param output_archive:
    :param options:
    :return: output_file
    """
    input = options.get('input_format', 'yaml')
    with prepare_data_folder(input_archive, input) as validation:
        if not validation:
            return None

        input_directory, input_file = validation

        successful = convert(
            CFG_CONVERTER_URL,
            input_directory if input == 'yaml' else input_file,
            output=output_archive,
            options=options,
            extract=False,
            timeout=CFG_CONVERTER_TIMEOUT,
        )

    # Error occurred, the output is a HTML file
    if not successful:
        output_file = output_archive[:-7] + '.html'
    else:
        output_file = output_archive
    move(output_archive, output_file)

    return output_file
Example #7
0
def convert_zip_archive(input_archive, output_archive, options):
    """ Convert a zip archive into a targz path with given options. """
    input_root_dir = tempfile.mkdtemp()
    with zipfile.ZipFile(input_archive, 'r') as zip_archive:
        zip_archive.extractall(path=input_root_dir)

    # Find the appropriate file/directory in the input archive
    input = options.get('input_format', 'yaml')
    validation = find_file_in_directory(
        input_root_dir,
        lambda x: x == 'submission.yaml' if input == 'yaml' else x.endswith('.oldhepdata')
    )
    if not validation:
        return None

    input_directory, input_file = validation

    successful = convert(
        CFG_CONVERTER_URL,
        input_directory if input == 'yaml' else input_file,
        output=output_archive,
        options=options,
        extract=False,
    )
    rmtree(input_root_dir)

    # Error occurred, the output is a HTML file
    if not successful:
        output_file = output_archive[:-7] + '.html'
    else:
        output_file = output_archive
    move(output_archive, output_file)

    return output_file
    def test_convert_404(self, oldhepdata_file):
        broken_url = self.get_server_url() + '/notavalidurl'

        with self.assertRaises(hepdata_converter_ws_client.Error) as cm:
            hepdata_converter_ws_client.convert(
                broken_url,
                oldhepdata_file,
                self.current_tmp,
                options={'input_format': 'oldhepdata'},
                timeout=5)

        self.assertEqual('Request to %s failed' % broken_url,
                         str(cm.exception))
        self.assertTrue(
            isinstance(cm.exception.__cause__, requests.exceptions.HTTPError))
        self.assertTrue(
            str(cm.exception.__cause__).startswith(
                "404 Client Error: NOT FOUND for url: %s" % broken_url))
    def test_return_value(self, oldhepdata_file, oldhepdata_yaml_path):
        # test fileobj
        path = os.path.join(self.current_tmp, 'yaml')
        r = hepdata_converter_ws_client.convert(self.get_server_url(), oldhepdata_file,
                                                options={'input_format': 'oldhepdata'})

        with tarfile.open(mode='r:gz', fileobj=StringIO.StringIO(r)) as tar:
            tar.extractall(path)

        self.assertDirsEqual(oldhepdata_yaml_path, os.path.join(path, ARCHIVE_NAME))
    def test_convert_timeout(self, oldhepdata_file):
        broken_url = 'https://example.com:81'

        with self.assertRaises(hepdata_converter_ws_client.Error) as cm:
            hepdata_converter_ws_client.convert(
                broken_url,
                oldhepdata_file,
                self.current_tmp,
                options={'input_format': 'oldhepdata'},
                timeout=5)

        self.assertEqual('Request to %s failed' % broken_url,
                         str(cm.exception))
        self.assertTrue(
            isinstance(cm.exception.__cause__,
                       requests.exceptions.ConnectTimeout))
        self.assertTrue(
            str(cm.exception.__cause__).startswith(
                "HTTPSConnectionPool(host='example.com', port=81): Max retries exceeded with url"
            ))
    def test_convert_no_extract(self, oldhepdata_path, oldhepdata_yaml_path):
        output = StringIO.StringIO()
        hepdata_converter_ws_client.convert(self.get_server_url(), oldhepdata_path, output,
                                            options={'input_format': 'oldhepdata'}, extract=False)
        output.seek(0)
        tmp_path = os.path.join(self.current_tmp, '1')
        with tarfile.open(mode='r:gz', fileobj=output) as tar:
            tar.extractall(tmp_path)
        self.assertDirsEqual(os.path.join(tmp_path, ARCHIVE_NAME),
                             oldhepdata_yaml_path)

        path = os.path.join(self.current_tmp, 'data.tar.gz')
        hepdata_converter_ws_client.convert(self.get_server_url(), oldhepdata_path, path,
                                            options={'input_format': 'oldhepdata'}, extract=False)

        tmp_path = os.path.join(self.current_tmp, '2')

        with tarfile.open(path, mode='r:gz') as tar:
            tar.extractall(tmp_path)
        self.assertDirsEqual(os.path.join(tmp_path, ARCHIVE_NAME),
                             oldhepdata_yaml_path)
    def test_return_value(self, oldhepdata_file, oldhepdata_yaml_path):
        # test fileobj
        path = os.path.join(self.current_tmp, 'yaml')
        r = hepdata_converter_ws_client.convert(
            self.get_server_url(),
            oldhepdata_file,
            options={'input_format': 'oldhepdata'})

        with tarfile.open(mode='r:gz', fileobj=BytesIO(r)) as tar:
            tar.extractall(path)

        self.assertDirsEqual(oldhepdata_yaml_path,
                             os.path.join(path, ARCHIVE_NAME))
    def test_caching(self, oldhepdata_path, oldhepdata_yaml_path):
        # test paths
        path_1 = os.path.join(self.current_tmp, 'yaml', '1')
        hepdata_converter_ws_client.convert(
            self.get_server_url(),
            oldhepdata_path,
            path_1,
            options={'input_format': 'oldhepdata'},
            id=1)

        self.assertDirsEqual(oldhepdata_yaml_path, path_1)

        path_2 = os.path.join(self.current_tmp, 'yaml', '2')
        hepdata_converter_ws_client.convert(
            self.get_server_url(),
            oldhepdata_path,
            path_2,
            options={'input_format': 'oldhepdata'},
            id=1)

        self.assertDirsEqual(oldhepdata_yaml_path, path_2)

        self.assertDirsEqual(path_1, path_2)
Example #14
0
def convert_oldhepdata_to_yaml(input_path, output_path):
    """ Converts the data on the server from oldhepdata format to the new one. """
    options = {
        'input_format': 'oldhepdata',
        'output_format': 'yaml',
    }
    successful = convert(
        CFG_CONVERTER_URL,
        input_path,
        output=output_path,
        options=options
    )

    return successful
Example #15
0
def convert_oldhepdata_to_yaml(input_path, output_path):
    """ Converts the data on the server from oldhepdata format to the new one. """
    options = {
        'input_format': 'oldhepdata',
        'output_format': 'yaml',
    }
    successful = convert(
        CFG_CONVERTER_URL,
        input_path,
        output=output_path,
        options=options
    )

    return successful
Example #16
0
def download_datatable(data_resource, file_format, *args, **kwargs):
    record_path, table_name = os.path.split(data_resource.file_location)

    filename = 'HEPData-{0}'.format(kwargs.pop('submission_id'))
    if 'table_name' in kwargs:
        filename += '-' + kwargs.pop('table_name').replace(' ', '')

    output_path = os.path.join(current_app.config['CFG_TMPDIR'], filename)

    if file_format == 'yaml':
        return send_file(
            data_resource.file_location,
            as_attachment=True,
        )

    options = {
        'input_format': 'yaml',
        'output_format': file_format,
        'table': table_name,
        'filename': table_name.split('.')[0],
    }

    if not os.path.exists(output_path):

        successful = convert(
            CFG_CONVERTER_URL,
            record_path,
            output=output_path + '-dir',
            options=options,
            extract=False,
        )
    else:
        successful = True

    # Error occurred, the output is a HTML file
    if successful:
        new_path = output_path + "." + file_format
        new_path = extract(filename + ".tar.gz", output_path + '-dir', new_path)
        file_to_send = get_file_in_directory(new_path, file_format)
    else:
        file_to_send = output_path + '-dir'
        file_format = 'html'

    return send_file(file_to_send, as_attachment=True,
                     attachment_filename=filename + '.' + file_format)
Example #17
0
def convert_oldhepdata_to_yaml(input_path, output_path):
    """
    Converts the data on the server from oldhepdata format to the new YAML format.

    :param input_path:
    :param output_path:
    :return: whether conversion was successful
    """
    options = {
        'input_format': 'oldhepdata',
        'output_format': 'yaml',
    }
    successful = convert(
        CFG_CONVERTER_URL,
        input_path,
        output=output_path,
        options=options
    )

    return successful
Example #18
0
def download_datatable(datasubmission, file_format, *args, **kwargs):
    """
    Download a particular data table given a ``datasubmission``.

    :param datasubmission:
    :param file_format:
    :param args:
    :param kwargs:
    :return: display_error or send_file depending on success of conversion
    """

    if file_format == 'json':
        return redirect('/record/data/{0}/{1}/{2}'.format(
            datasubmission.publication_recid, datasubmission.id,
            datasubmission.version))
    elif file_format not in CFG_SUPPORTED_FORMATS:
        return display_error(
            title="The " + file_format + " output format is not supported",
            description="This output format is not supported. " +
            "Currently supported formats: " + str(CFG_SUPPORTED_FORMATS),
        )

    dataresource = DataResource.query.filter_by(
        id=datasubmission.data_file).one()

    record_path, table_name = os.path.split(dataresource.file_location)

    filename = 'HEPData-{0}-v{1}'.format(kwargs.pop('submission_id'),
                                         datasubmission.version)
    if 'table_name' in kwargs:
        filename += '-' + kwargs.pop('table_name').replace(' ', '_').replace(
            '/', '_').replace('$', '').replace('\\', '')

    output_path = os.path.join(current_app.config['CFG_TMPDIR'], filename)

    if file_format == 'yaml':
        return send_file(dataresource.file_location,
                         as_attachment=True,
                         attachment_filename=filename + '.yaml')

    options = {
        'input_format': 'yaml',
        'output_format': file_format,
        'table': table_name,
        'filename': table_name.split('.')[0],
        'validator_schema_version': '0.1.0',
    }

    hepsubmission = HEPSubmission.query.filter_by(
        publication_recid=datasubmission.publication_recid,
        version=datasubmission.version).first()

    if datasubmission.doi and hepsubmission.overall_status != 'sandbox':
        options['hepdata_doi'] = datasubmission.doi.rsplit(
            '/', 1)[0].encode('ascii')

    if file_format == 'yoda':
        rivet_analysis_name = kwargs.pop('rivet_analysis_name', '')
        if rivet_analysis_name:
            options['rivet_analysis_name'] = rivet_analysis_name
        elif datasubmission.publication_inspire_id:
            record = get_record_contents(datasubmission.publication_recid)
            if record:
                # Check if this record has a Rivet analysis, then extract the Rivet analysis name from the URL.
                if 'analyses' in record:
                    for analysis in record['analyses']:
                        if analysis['type'] == 'rivet':
                            options['rivet_analysis_name'] = analysis[
                                'analysis'].split('/')[-1]
                # Otherwise guess the Rivet analysis name using the collaboration name,
                # the creation year of the INSPIRE record, and the INSPIRE ID.
                if 'rivet_analysis_name' not in options:
                    try:
                        year = parse(record['creation_date']).year
                    except:
                        year = record['year']  # publication year
                    options['rivet_analysis_name'] = '{0}_{1}_I{2}'.format(
                        ''.join(record['collaborations']).upper(), year,
                        datasubmission.publication_inspire_id)

    successful = convert(
        CFG_CONVERTER_URL,
        record_path,
        output=output_path + '-dir',
        options=options,
        extract=False,
    )

    if successful:
        new_path = output_path + "." + file_format
        new_path = extract(output_path + '-dir', new_path)
        os.remove(output_path + '-dir')
        file_to_send = get_file_in_directory(new_path, file_format)
    else:
        # Error occurred, the output is a HTML file
        file_to_send = output_path + '-dir'
        file_format = 'html'

    return send_file(file_to_send,
                     as_attachment=True,
                     attachment_filename=filename + '.' + file_format)
Example #19
0
def download_datatable(datasubmission, file_format, *args, **kwargs):
    """
    Download a particular data table given a ``datasubmission``.

    :param datasubmission:
    :param file_format:
    :param args:
    :param kwargs:
    :return: display_error or send_file depending on success of conversion
    """

    if file_format == 'json':
        return redirect('/record/data/{0}/{1}/{2}'.format(datasubmission.publication_recid,
                                                   datasubmission.id, datasubmission.version))
    elif file_format not in CFG_SUPPORTED_FORMATS:
        return display_error(
            title="The " + file_format + " output format is not supported",
            description="This output format is not supported. " +
                        "Currently supported formats: " + str(CFG_SUPPORTED_FORMATS),
        )

    dataresource = DataResource.query.filter_by(id=datasubmission.data_file).one()

    record_path, table_name = os.path.split(dataresource.file_location)

    filename = 'HEPData-{0}-v{1}'.format(kwargs.pop('submission_id'), datasubmission.version)
    if 'table_name' in kwargs:
        filename += '-' + kwargs.pop('table_name').replace(' ', '_').replace('/', '_').replace('$', '').replace('\\','')

    output_path = os.path.join(current_app.config['CFG_TMPDIR'], filename)

    if file_format == 'yaml' or file_format == 'original':
        return send_file(
            dataresource.file_location,
            as_attachment=True,
            attachment_filename=filename + '.yaml'
        )

    options = {
        'input_format': 'yaml',
        'output_format': file_format,
        'table': table_name,
        'filename': table_name.split('.')[0],
        'validator_schema_version': '0.1.0',
    }

    hepsubmission = HEPSubmission.query.filter_by(publication_recid=datasubmission.publication_recid,
                                                  version=datasubmission.version).first()

    if datasubmission.doi and not hepsubmission.overall_status.startswith('sandbox'):
        options['hepdata_doi'] = datasubmission.doi.rsplit('/', 1)[0]

    if file_format == 'yoda':
        rivet_analysis_name = kwargs.pop('rivet_analysis_name', '')
        if not rivet_analysis_name:
            rivet_analysis_name = guess_rivet_analysis_name(hepsubmission)
        if rivet_analysis_name:
            options['rivet_analysis_name'] = rivet_analysis_name

    try:
        successful = convert(
            CFG_CONVERTER_URL,
            record_path,
            output=output_path + '-dir',
            options=options,
            extract=False,
            timeout=CFG_CONVERTER_TIMEOUT,
        )
    except Error as error:  # hepdata_converter_ws_client.Error
        return display_error(title='Report concerns to [email protected]', description=str(error))

    if successful:
        new_path = output_path + "." + file_format
        new_path = extract(output_path + '-dir', new_path)
        os.remove(output_path + '-dir')
        file_to_send = get_file_in_directory(new_path, file_format)
    else:
        # Error occurred, the output is a HTML file
        file_to_send = output_path + '-dir'
        file_format = 'html'

    return send_file(file_to_send, as_attachment=True,
                     attachment_filename=filename + '.' + file_format)
Example #20
0
def download_datatable(datasubmission, file_format, *args, **kwargs):

    if file_format == 'json':
        return redirect('/record/data/{0}/{1}/{2}'.format(
            datasubmission.publication_recid, datasubmission.id,
            datasubmission.version))
    elif file_format not in CFG_SUPPORTED_FORMATS:
        return display_error(
            title="The " + file_format + " output format is not supported",
            description="This output format is not supported. " +
            "Currently supported formats: " + str(CFG_SUPPORTED_FORMATS),
        )

    dataresource = DataResource.query.filter_by(
        id=datasubmission.data_file).one()

    record_path, table_name = os.path.split(dataresource.file_location)

    filename = 'HEPData-{0}-v{1}'.format(kwargs.pop('submission_id'),
                                         datasubmission.version)
    if 'table_name' in kwargs:
        filename += '-' + kwargs.pop('table_name').replace(' ', '')

    output_path = os.path.join(current_app.config['CFG_TMPDIR'], filename)

    if file_format == 'yaml':
        return send_file(dataresource.file_location,
                         as_attachment=True,
                         attachment_filename=filename + '.yaml')

    options = {
        'input_format': 'yaml',
        'output_format': file_format,
        'table': table_name,
        'filename': table_name.split('.')[0],
    }

    if datasubmission.doi:
        options['hepdata_doi'] = datasubmission.doi.rsplit(
            '/', 1)[0].encode('ascii')

    if datasubmission.publication_inspire_id and file_format == 'yoda':
        record = get_record_contents(datasubmission.publication_recid)
        if record:
            options['rivet_analysis_name'] = '{0}_{1}_I{2}'.format(
                ''.join(record['collaborations']).upper(), record['year'],
                datasubmission.publication_inspire_id)

    if not os.path.exists(output_path):

        successful = convert(
            CFG_CONVERTER_URL,
            record_path,
            output=output_path + '-dir',
            options=options,
            extract=False,
        )
    else:
        successful = True

    # Error occurred, the output is a HTML file
    if successful:
        new_path = output_path + "." + file_format
        new_path = extract(filename + ".tar.gz", output_path + '-dir',
                           new_path)
        file_to_send = get_file_in_directory(new_path, file_format)
    else:
        file_to_send = output_path + '-dir'
        file_format = 'html'

    return send_file(file_to_send,
                     as_attachment=True,
                     attachment_filename=filename + '.' + file_format)