Esempio n. 1
0
def import_one_resource(line):
    """this single resource import function must be outside of the BusinessDataImporter
    class in order for it to be called with multiprocessing"""

    connections.close_all()
    reader = ArchesFileReader()
    archesresource = JSONDeserializer().deserialize(line)
    reader.import_business_data({"resources": [archesresource]})
Esempio n. 2
0
    def import_business_data(self,
                             file_format=None,
                             business_data=None,
                             mapping=None,
                             overwrite='append',
                             bulk=False,
                             create_concepts=False,
                             create_collections=False):
        reader = None
        start = time()
        cursor = connection.cursor()

        try:
            if file_format == None:
                file_format = self.file_format
            if business_data == None:
                business_data = self.business_data
            if mapping == None:
                mapping = self.mapping
            if file_format == 'json':
                reader = ArchesFileReader()
                reader.import_business_data(business_data, mapping)
            elif file_format == 'csv' or file_format == 'shp' or file_format == 'zip':
                if mapping != None:
                    reader = CsvReader()
                    reader.import_business_data(
                        business_data=business_data,
                        mapping=mapping,
                        overwrite=overwrite,
                        bulk=bulk,
                        create_concepts=create_concepts,
                        create_collections=create_collections)
                else:
                    print '*' * 80
                    print 'ERROR: No mapping file detected. Please indicate one with the \'-c\' paramater or place one in the same directory as your business data.'
                    print '*' * 80
                    sys.exit()

            elapsed = (time() - start)
            print 'Time to import_business_data = {0}'.format(
                datetime.timedelta(seconds=elapsed))

            reader.report_errors()

        finally:
            datatype_factory = DataTypeFactory()
            datatypes = DDataType.objects.all()
            for datatype in datatypes:
                datatype_instance = datatype_factory.get_instance(
                    datatype.datatype)
                datatype_instance.after_update_all()
Esempio n. 3
0
    def import_business_data(self, file_format=None, business_data=None, mapping=None, overwrite='append', bulk=False):
        reader = None
        start = time()
        cursor = connection.cursor()

        try:
            if file_format == None:
                file_format = self.file_format
            if business_data == None:
                business_data = self.business_data
            if mapping == None:
                mapping = self.mapping
            if file_format == 'json':
                reader = ArchesFileReader()
                reader.import_business_data(business_data, mapping)
            elif file_format == 'csv':
                if mapping != None:
                    reader = CsvReader()
                    reader.import_business_data(business_data=business_data, mapping=mapping, overwrite=overwrite, bulk=bulk)
                else:
                    print '*'*80
                    print 'ERROR: No mapping file detected. Please indicate one with the \'-c\' paramater or place one in the same directory as your business data.'
                    print '*'*80
                    sys.exit()

            elapsed = (time() - start)
            print 'Time to import_business_data = {0}'.format(datetime.timedelta(seconds=elapsed))

            reader.report_errors()

        finally:
            datatype_factory = DataTypeFactory()
            datatypes = DDataType.objects.all()
            for datatype in datatypes:
                datatype_instance = datatype_factory.get_instance(datatype.datatype)
                datatype_instance.after_update_all()
Esempio n. 4
0
    def import_business_data(self,
                             file_format=None,
                             business_data=None,
                             mapping=None,
                             overwrite='append',
                             bulk=False,
                             create_concepts=False,
                             create_collections=False,
                             use_multiprocessing=False):
        reader = None
        start = time()
        cursor = connection.cursor()

        try:
            if file_format == None:
                file_format = self.file_format
            if business_data == None:
                business_data = self.business_data
            if mapping == None:
                mapping = self.mapping
            if file_format == 'json':
                reader = ArchesFileReader()
                reader.import_business_data(business_data, mapping)
            elif file_format == 'jsonl':
                with open(self.file[0], 'rU') as openf:
                    lines = openf.readlines()
                    if use_multiprocessing is True:
                        pool = Pool(cpu_count())
                        pool.map(import_one_resource, lines)
                        connections.close_all()
                        reader = ArchesFileReader()
                    else:
                        reader = ArchesFileReader()
                        for line in lines:
                            archesresource = JSONDeserializer().deserialize(
                                line)
                            reader.import_business_data(
                                {"resources": [archesresource]})
            elif file_format == 'csv' or file_format == 'shp' or file_format == 'zip':
                if mapping != None:
                    reader = CsvReader()
                    reader.import_business_data(
                        business_data=business_data,
                        mapping=mapping,
                        overwrite=overwrite,
                        bulk=bulk,
                        create_concepts=create_concepts,
                        create_collections=create_collections)
                else:
                    print '*' * 80
                    print 'ERROR: No mapping file detected. Please indicate one with the \'-c\' paramater or place one in the same directory as your business data.'
                    print '*' * 80
                    sys.exit()

            elapsed = (time() - start)
            print 'Time to import_business_data = {0}'.format(
                datetime.timedelta(seconds=elapsed))

            reader.report_errors()

        finally:
            datatype_factory = DataTypeFactory()
            datatypes = DDataType.objects.all()
            for datatype in datatypes:
                datatype_instance = datatype_factory.get_instance(
                    datatype.datatype)
                datatype_instance.after_update_all()