Ejemplo n.º 1
0
def copy_file(model, file_path=None, **kwargs):

    table_name = model._meta.db_table
    with open(file_path, 'r') as file:
        columns = file.readline().replace('"', '').replace('\n', '')
        sql = copy_query(table_name, columns)

        try:
            copy_insert_from_csv(table_name, file_path, **kwargs)
        except Exception as e:
            logger.warning("Database - Bulk Import Error - beginning Batch seeding. Error: {}".format(e))
            rows = from_csv_file_to_gen(file_path, kwargs['update'])
            batch_upsert_from_gen(model, rows, settings.BATCH_SIZE, **kwargs)
Ejemplo n.º 2
0
def seed_from_csv_diff(original_file_path, new_file_path, model, **kwargs):
    """
    takes new file, filters it down in size, adds to Set()
    takes old file, adds to Set()
    saves to temporary file for read to avoid high memory usage

    Diff Set() = New file Set() - Old file Set()
     - preserves new records
     - preserves altered/updated records
     - removes duplicate, non updated records

    seeds Diff Set() in batches

    """

    original_diff_set = set()
    new_diff_set = set()
    new_file = open(new_file_path, 'r')
    headers = new_file.readline().replace('\n', '').split(',')
    new_reader = model.update_set_filter(csv.reader(new_file), headers)

    original_file = open(original_file_path, 'r')
    original_reader = csv.reader(original_file)
    next(original_reader, None)
    logger.debug(" * Beginning CSV diff process.")

    for row in new_reader:
        new_diff_set.add(json.dumps(row))

    for row in original_reader:
        original_diff_set.add(json.dumps(row))

    diff = new_diff_set - original_diff_set
    temp_file_path = os.path.join(settings.MEDIA_TEMP_ROOT, str(
        'set_diff' + str(random.randint(1, 10000000))) + '.mock' if settings.TESTING else '.csv')
    with open(temp_file_path, 'w') as temp_file:
        writer = csv.writer(temp_file, delimiter=',')
        writer.writerow(headers)
        for row in diff:
            writer.writerow(json.loads(row))

    diff_gen = from_csv_file_to_gen(temp_file_path, kwargs['update'])
    logger.debug(" * Csv diff completed, beginning batch upsert.")
    batch_upsert_from_gen(model, diff_gen, settings.BATCH_SIZE, **kwargs)
    if os.path.isfile(temp_file_path):
        os.remove(temp_file_path)
    if 'callback' in kwargs and kwargs['callback']:
        kwargs['callback']()
Ejemplo n.º 3
0
 def transform_self(self, file_path, update=None):
     return self.pre_validation_filters(
         from_csv_file_to_gen(file_path, update))
Ejemplo n.º 4
0
 def transform_self(self, file_path, update=None):
     return self.pre_validation_filters(with_bbl(from_csv_file_to_gen(file_path, update), borough='boro'))
Ejemplo n.º 5
0
 def transform_self(self, file_path, update=None):
     return self.pre_validation_filters(
         with_bbl(from_csv_file_to_gen(file_path, update),
                  allow_blank=True))
Ejemplo n.º 6
0
 def transform_self(self, file_path, update=None):
     return self.pre_validation_filters(
         from_csv_file_to_gen(file_path, update,
                              self.clean_null_bytes_headers))
Ejemplo n.º 7
0
 def transform_self(self, file_path, update=None):
     return with_bbl(from_csv_file_to_gen(file_path, update))