Example #1
0
 def test_process_data_list(self):
     # setup
     raw_data = [
         {
             'foo': 'a',
             'bar': {
                 'baz': 'b'
             },
             'bof': 'c'
         },
         {
             'foo': 'x',
             'bar': {
                 'baz': 'y'
             },
             'bof': None
         },
     ]
     name = str(uuid.uuid4())
     root_path = 'test/%s' % name
     desc = 'flimp unit-test suite'
     about = None
     allowEmpty = True
     process_data_list(raw_data, root_path, name, desc, about, allowEmpty)
     # check we have two objects each with three tags attached
     result = Object.filter('has %s/foo' % root_path)
     self.assertEqual(2, len(result))
     for obj in result:
         self.assertEqual(3, len(obj.tag_paths))
Example #2
0
 def test_process_data_list(self):
     # setup
     raw_data = [
         {
             'foo': 'a',
             'bar': {
                 'baz': 'b'
             },
             'bof': 'c'
         },
         {
             'foo': 'x',
             'bar': {
                 'baz': 'y'
             },
             'bof': None
         },
     ]
     name = str(uuid.uuid4())
     root_path = 'test/%s' % name
     desc = 'flimp unit-test suite'
     about = None
     allowEmpty = True
     process_data_list(raw_data, root_path, name, desc, about, allowEmpty)
     # check we have two objects each with three tags attached
     result = Object.filter('has %s/foo' % root_path)
     self.assertEqual(2, len(result))
     for obj in result:
         self.assertEqual(3, len(obj.tag_paths))
Example #3
0
def process(filename,
            root_path,
            name,
            desc,
            about,
            preview=False,
            check=False,
            allowEmpty=True):
    """
    The recipe for grabbing the file and pushing it to FluidDB
    """
    # Turn the raw input file into a list data structure containing the items
    # to import into FluidDB
    raw_data = clean_data(filename)
    number_of_records = len(raw_data)
    logger.info('Raw filename: %r' % filename)
    logger.info('Root namespace path: %r' % root_path)
    logger.info('About tag field key: %r' % about)
    logger.info('%d records found' % number_of_records)

    if preview or check:
        if preview:
            # just print out/log a preview
            logger.info('Generating preview...')
            output = list()
            output.append("Preview of processing %r\n" % filename)
            output.append("The following namespaces/tags will be generated.\n")
            output.extend(get_preview(raw_data, root_path))
            output.append("\n%d records will be imported into FluidDB\n" %
                          number_of_records)
            result = "\n".join(output)
            logger.info(result)
            return result
        else:
            # check the file and display the results
            logger.info('Validating %r\n' % filename)
            output = list()
            errors, warnings = validate(raw_data)
            if errors:
                output.append("The following ERRORS were found:\n")
                output.extend(errors)
                output.append('\n')
            if warnings:
                output.append("The following WARNINGS were generated:\n")
                output.extend(warnings)
            if output:
                result = "\n".join(output)
            else:
                result = "Validation passed ok"
            logger.info(result)
            return result
    else:
        process_data_list(raw_data, root_path, name, desc, about, allowEmpty)
        return "Processed %d records" % number_of_records
Example #4
0
def process(filename, root_path, name, desc, about, preview=False,
            check=False, allowEmpty=True):
    """
    The recipe for grabbing the file and pushing it to FluidDB
    """
    # Turn the raw input file into a list data structure containing the items
    # to import into FluidDB
    raw_data = clean_data(filename)
    number_of_records = len(raw_data)
    logger.info('Raw filename: %r' % filename)
    logger.info('Root namespace path: %r' % root_path)
    logger.info('About tag field key: %r' % about)
    logger.info('%d records found' % number_of_records) 

    if preview or check:
        if preview:
            # just print out/log a preview
            logger.info('Generating preview...')
            output = list()
            output.append("Preview of processing %r\n" % filename)
            output.append("The following namespaces/tags will be generated.\n")
            output.extend(get_preview(raw_data, root_path))
            output.append("\n%d records will be imported into FluidDB\n" %
                          number_of_records)
            result = "\n".join(output)
            logger.info(result)
            return result
        else:
            # check the file and display the results
            logger.info('Validating %r\n' % filename)
            output = list()
            errors, warnings = validate(raw_data)
            if errors:
                output.append("The following ERRORS were found:\n")
                output.extend(errors)
                output.append('\n')
            if warnings:
                output.append("The following WARNINGS were generated:\n")
                output.extend(warnings)
            if output:
                result = "\n".join(output)
            else:
                result = "Validation passed ok"
            logger.info(result)
            return result
    else:
        process_data_list(raw_data, root_path, name, desc, about, allowEmpty)
        return "Processed %d records" % number_of_records
Example #5
0
        'baz': 'c'
    },
    {
        'foo': 'x',
        'bar': 'y',
        'baz': 'z'
    },
] # the Python list of dictionaries you want to process
root_path = 'test/foo'# Namespace where imported namespaces/tags are created
name = 'dataset_name' # used when creating namespace/tag descriptions 
desc = 'Plain English dataset description' # exactly what it says
about = 'foo' # field whose value to use for the about tag
preview = False # True will cause flimp to print out the preview

# Make magic happen...
process_data_list(data_list, root_path, name, desc, about)

# You can also validate the list to check for dictionaries that don't match
# the "template" taken from the first entry in the list.

# missing = missing fields, extras = extra fields not in the template - both
# are lists of instances of these problems.
missing, extras = validate(data_list)

# In the case of cleaning csv data you have several ways to normalise / clean
# the input
def clean_header(header):
    """
    A function that takes a column name header and normalises / cleans it into
    something we'll use as the name of a tag
    """
Example #6
0
        'baz': 'c'
    },
    {
        'foo': 'x',
        'bar': 'y',
        'baz': 'z'
    },
]  # the Python list of dictionaries you want to process
root_path = 'test/foo'  # Namespace where imported namespaces/tags are created
name = 'dataset_name'  # used when creating namespace/tag descriptions
desc = 'Plain English dataset description'  # exactly what it says
about = 'foo'  # field whose value to use for the about tag
preview = False  # True will cause flimp to print out the preview

# Make magic happen...
process_data_list(data_list, root_path, name, desc, about)

# You can also validate the list to check for dictionaries that don't match
# the "template" taken from the first entry in the list.

# missing = missing fields, extras = extra fields not in the template - both
# are lists of instances of these problems.
missing, extras = validate(data_list)


# In the case of cleaning csv data you have several ways to normalise / clean
# the input
def clean_header(header):
    """
    A function that takes a column name header and normalises / cleans it into
    something we'll use as the name of a tag