Ejemplo n.º 1
0
    def load(self, source):
        file_name, file_format = os.path.splitext(source)
        archesjson = False
        if file_format == '.arches':
            reader = ArchesReader()
            print '\nVALIDATING ARCHES FILE ({0})'.format(source)
            reader.validate_file(source)
        elif file_format == '.json':
            archesjson = True
            reader = JsonReader()

        start = time()
        resources = reader.load_file(source)

        print '\nLOADING RESOURCES ({0})'.format(source)
        relationships = None
        related_resource_records = []
        relationships_file = file_name + '.relations'
        elapsed = (time() - start)
        print 'time to parse {0} resources = {1}'.format(file_name, elapsed)
        results = self.resource_list_to_entities(resources, archesjson)
        if os.path.exists(relationships_file):
            relationships = csv.DictReader(open(relationships_file, 'r'), delimiter='|')
            for relationship in relationships:
                related_resource_records.append(self.relate_resources(relationship, results['legacyid_to_entityid'], archesjson))
        else:
            print 'No relationship file'
Ejemplo n.º 2
0
    def load(self, source, appending=False):
        file_name, file_format = os.path.splitext(source)
        archesjson = False
        if file_format == '.shp':
            reader = ShapeReader()
        elif file_format == '.arches':
            reader = ArchesReader()
            print '\nVALIDATING ARCHES FILE ({0})'.format(source)
            # reader.validate_file(source)
        elif file_format == '.json':
            archesjson = True
            reader = JsonReader()
            print '\nVALIDATING JSON FILE ({0})'.format(source)
            reader.validate_file(source)
        elif file_format == '.jsonl':
            archesjson = True
            reader = JsonReader()
            print '\nNO VALIDATION USED ON JSONL FILE ({0})'.format(source)
            d = datetime.datetime.now()
            load_id = 'LOADID:{0}-{1}-{2}-{3}-{4}-{5}'.format(
                d.year, d.month, d.day, d.hour, d.minute, d.microsecond)
            loaded_ct = 0
            with open(source, "rb") as openf:
                lines = openf.readlines()
                for line in lines:
                    resource = json.loads(line)
                    result = self.resource_list_to_entities(
                        [resource],
                        True,
                        False,
                        filename=os.path.basename(source),
                        load_id=load_id)
                    loaded_ct += 1
            return {"count": loaded_ct}

        start = time()
        resources = reader.load_file(source)

        print '\nLOADING RESOURCES ({0})'.format(source)
        relationships = None
        related_resource_records = []
        relationships_file = file_name + '.relations'
        elapsed = (time() - start)
        print 'time to parse {0} resources = {1}'.format(file_name, elapsed)
        results = self.resource_list_to_entities(
            resources,
            archesjson,
            appending,
            filename=os.path.basename(source))
        if os.path.exists(relationships_file):
            with open(relationships_file, "rb") as openf:
                lines = openf.readlines()
                if "," in lines[0]:
                    delim = ","
                elif "|" in lines[0]:
                    delim = "|"
                else:
                    delim = ","
            relationships = csv.DictReader(open(relationships_file, 'r'),
                                           delimiter=delim)
            for relationship in relationships:
                related_resource_records.append(
                    self.relate_resources(relationship,
                                          results['legacyid_to_entityid'],
                                          archesjson))
        else:
            print 'No relationship file'

        return results