Python StandardFileReader Examples

Programming Language: Python

Namespace/Package Name: adsdata.reader

Method/Function: StandardFileReader

Examples at hotexamples.com: 3

Python StandardFileReader - 3 examples found. These are the top rated real world Python examples of adsdata.reader.StandardFileReader extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: run.py Project: adsabs/AdsDataSqlSync

def load_column_files(config, nonbib_db_engine, nonbib_db_conn, sql_sync):
    """ use psycopg.copy_from to data from column file to postgres

    after data has been loaded, join to create a unified row view
    """
    raw_conn = nonbib_db_engine.raw_connection()
    cur = raw_conn.cursor()

    for t in nonbib.NonBib.all_types:
        table_name = sql_sync.schema + '.' + t
        logger.info('processing {}'.format(table_name))
        # here we have to read multiple files, information in config file are in a list
        if t == 'datalinks':
            datalinks.load_column_files_datalinks_table(
                config, table_name, t, raw_conn, cur)
        else:
            filename = config['DATA_PATH'] + config[t.upper()]
            if t == 'canonical':
                r = reader.BibcodeFileReader(filename)
            elif t in ('refereed', 'pub_openaccess', 'private', 'ocrabstract',
                       'nonarticle'):
                r = reader.OnlyTrueFileReader(filename)
            else:
                r = reader.StandardFileReader(t, filename)
            if r:
                cur.copy_from(r, table_name)
                raw_conn.commit()

    cur.close()
    raw_conn.close()
    sql_sync.create_joined_rows(nonbib_db_conn)

Example #2

Show file

File: test_rowview_ingest.py Project: adsabs/AdsDataSqlSync

    def standard_reader_test(self, file_type, spot_checks, data_dir='data1/'):
        """verify standard reader creates the correct sql value

        spot_checks is a list of (bibcode, value) pairs to verify.
        spot_checks might include first and last bibcodes in file
        and other interesting or edge cases.
        """
        filename = self.config['TEST_DATA_PATH'] + data_dir + self.config[
            file_type.upper()]
        lines_in_file = sum(1 for line in open(filename))
        r = reader.StandardFileReader(file_type, filename)
        bibcode_count = 0
        line = r.read()
        spot_checks_found = []
        multi_line = ('simbad', 'grants', 'citation', 'reference', 'reader')
        multi_value = ('relevance')
        while line:
            bibcode_count += 1
            parts = line.split('\t')
            bibcode = parts[0].strip()
            if file_type in multi_value:
                value = line[20:]
            else:
                value = parts[1].strip()
            if file_type not in multi_value:
                self.assertEqual(
                    2, len(parts),
                    '{} lines should only include bibcode and value array {}'.
                    format(file_type, line))
                self.assertEqual('{', value[0],
                                 'invalid sql array {}'.format(value))
                self.assertEqual('}', value[-1],
                                 'invalid sql array {}'.format(value))
            # we spot check a couple fields
            for spot_check in spot_checks:
                spot_bibcode = spot_check[0]
                if bibcode == spot_bibcode:
                    spot_checks_found.append(spot_bibcode)
                    spot_value = spot_check[1]
                    self.assertEqual(
                        spot_value, value,
                        'bad {} value for bibcode {}, expected {}, received {}'
                        .format(file_type, bibcode, spot_value, value))
            line = r.read()
        r.close()
        self.assertEqual(
            len(spot_checks), len(spot_checks_found),
            'for {} did not find all spot checks'.format(file_type))
        if file_type not in multi_line:
            self.assertEqual(
                lines_in_file, bibcode_count,
                '{} standard reader returned wrong number of lines'.format(
                    file_type))

Example #3

Show file

File: test_rowview_ingest.py Project: adsabs/AdsDataSqlSync

    def test_bad_bibcode(self):
        """bad bicode in input file should be logged and skipped and rest of file processed

        one bad bibcode in a downloads file"""
        file_type = 'download'
        filename = self.config[
            'TEST_DATA_PATH'] + 'dataInvalid/' + self.config[file_type.upper()]
        lines_in_file = sum(1 for line in open(filename))
        r = reader.StandardFileReader(file_type, filename)
        bibcode_count = 0
        line = r.read()
        while line:
            bibcode_count += 1
            line = r.read()
        self.assertEqual(bibcode_count, lines_in_file - 1,
                         'bad bibcode in file not skipped')