def test_field_value_mapping_and_types(self): c = CopyMapping( BasicMockObject, self.mapping_path, dict(name='NAME', number='NUMBER', dt='DATE'), field_value_mapping={ 'name': { 'ben': 'Master Ben', 'joe': 'Padawan Joe', 'jane': 'Jedi Jane' }, 'number': { 'seven': 7, 'three': 3, 'five': 5 } }, field_copy_types={'number': 'text'} ) c.save() self.assertEqual(BasicMockObject.objects.count(), 3) self.assertEqual( list(BasicMockObject.objects.order_by('name').values_list('name', 'number')), [('Jedi Jane', 5), ('Master Ben', 7), ('Padawan Joe', 3)] ) self.assertEqual( BasicMockObject.objects.get(name='Master Ben').dt, date(2012, 1, 1) )
def test_field_value_mapping_and_types(self): c = CopyMapping(BasicMockObject, self.mapping_path, dict(name='NAME', number='NUMBER', dt='DATE'), field_value_mapping={ 'name': { 'ben': 'Master Ben', 'joe': 'Padawan Joe', 'jane': 'Jedi Jane' }, 'number': { 'seven': 7, 'three': 3, 'five': 5 } }, field_copy_types={'number': 'text'}) c.save() self.assertEqual(BasicMockObject.objects.count(), 3) self.assertEqual( list( BasicMockObject.objects.order_by('name').values_list( 'name', 'number')), [('Jedi Jane', 5), ('Master Ben', 7), ('Padawan Joe', 3)]) self.assertEqual( BasicMockObject.objects.get(name='Master Ben').dt, date(2012, 1, 1))
def insertData(apps, schema_editor): lang = Lang(code='es', name='Español') lang.save() curdir = os.path.dirname(os.path.realpath(__file__)) path = os.path.normpath(os.path.join(curdir, os.pardir, os.pardir, 'data', 'mercado_continuo.csv')) c = CopyMapping( Symbol, path, dict(name='nombre', ticker='ticker'), static_mapping= {'market': 'Mercado Continuo', 'type': 'stock'} ) c.save() Symbol(ticker='IBEX35', name='IBEX 35', market='Mercado Continuo', type=Symbol.INDEX).save() Symbol(ticker='IBEXTR', name='IBEX Total Return', market='Mercado Continuo', type=Symbol.INDEX).save() invertia_quote_tpl = 'https://www.invertia.com/es/mercados/bolsa/empresas/historico?p_p_id=cotizacioneshistoricas_WAR_ivfrontmarketsportlet&p_p_lifecycle=2&p_p_state=normal&p_p_mode=view&p_p_resource_id=exportExcel&p_p_cacheability=cacheLevelPage&p_p_col_id=column-1&p_p_col_pos=1&p_p_col_count=2&_cotizacioneshistoricas_WAR_ivfrontmarketsportlet_startDate={{startDate}}&_cotizacioneshistoricas_WAR_ivfrontmarketsportlet_endDate={{endDate}}&_cotizacioneshistoricas_WAR_ivfrontmarketsportlet_idtel={invertia_key}' invertia_dividend_tpl = 'https://www.invertia.com/es/mercados/bolsa/empresas/dividendos/-/empresa/{empresa}/{invertia_key}' curdir = os.path.dirname(os.path.realpath(__file__)) path = os.path.normpath(os.path.join(curdir, os.pardir, os.pardir, 'data', 'invertia_claves.csv')) invertia_keys = csv.reader(open(path)) next(invertia_keys, None) # skip the headers quote_sources = [] for row in invertia_keys: quote_url = invertia_quote_tpl.format(invertia_key=row[2]) dividend_url = invertia_dividend_tpl.format(empresa=row[1], invertia_key=row[2]) try: symbol_id = Symbol.objects.get(ticker=row[0]).id SymbolSource(name='invertia', symbol_id=symbol_id, key=quote_url, type=SymbolSource.QUOTE).save() SymbolSource(name='invertia', symbol_id=symbol_id, key=dividend_url, type=SymbolSource.DIVIDEND).save() except Symbol.DoesNotExist: print('Symbol with ticker', row[0], "(%s)" % row[1], 'not found. Cotinuing...') continue mcs = Symbol.objects.filter(market='Mercado Continuo').exclude(ticker__in=('IBEX35', 'IBEXTR')) for symbol in mcs: SymbolSource(name='quantmod', symbol_id=symbol.id, key='%s.mc' % symbol.ticker, type=SymbolSource.SPLIT).save()
def test_limited_save(self): c = CopyMapping(LimitedMockObject, self.name_path, dict(name='NAME', dt='DATE')) c.save() self.assertEqual(LimitedMockObject.objects.count(), 3) self.assertEqual( LimitedMockObject.objects.get(name='BEN').dt, date(2012, 1, 1))
def handle(self, *args, **options): data = os.path.join(settings.BASE_DIR, "data") files = list(all_files(data, "*.csv")) for filepath in files: log("Opening file {}\n".format(filepath), "cyan") log(" Loading data ...\n") copy = CopyMapping( Incident, filepath, dict( date="IncidentDate", year="Year", raw_location="Location", address="Edited Street Address", city_and_state="City and State", full_address="Edited Full Address", latitude="Latitude", longitude="Longitude", accuracy_score="Accuracy Score", accuracy_type="Accuracy Type", number="Number", street="Street", city="City", state="State", county="County", zipcode="Zip", ), ) copy.save() log(" Data loaded!\n", "green")
def handle(self, *args, **options): data = os.path.join(settings.BASE_DIR, 'data') files = list(all_files(data, '*.csv')) for filepath in files: log('Opening file {}\n'.format(filepath), 'cyan') log(' Loading data ...\n') copy = CopyMapping( Incident, filepath, dict( date='IncidentDate', year='Year', raw_location='Location', address='Edited Street Address', city_and_state='City and State', full_address='Edited Full Address', latitude='Latitude', longitude='Longitude', accuracy_score='Accuracy Score', accuracy_type='Accuracy Type', number='Number', street='Street', city='City', state='State', county='County', zipcode='Zip', )) copy.save() log(' Data loaded!\n', 'green')
def test_save_steps(self): c = CopyMapping( MockObject, self.name_path, dict(name='NAME', number='NUMBER', dt='DATE'), ) cursor = c.conn.cursor() c.create(cursor) cursor.execute("""SELECT count(*) FROM %s;""" % c.temp_table_name) self.assertEquals(cursor.fetchone()[0], 0) cursor.execute("""SELECT count(*) FROM %s;""" % c.model._meta.db_table) self.assertEquals(cursor.fetchone()[0], 0) c.copy(cursor) cursor.execute("""SELECT count(*) FROM %s;""" % c.temp_table_name) self.assertEquals(cursor.fetchone()[0], 3) cursor.execute("""SELECT count(*) FROM %s;""" % c.model._meta.db_table) self.assertEquals(cursor.fetchone()[0], 0) c.insert(cursor) cursor.execute("""SELECT count(*) FROM %s;""" % c.model._meta.db_table) self.assertEquals(cursor.fetchone()[0], 3) c.drop(cursor) self.assertEquals(cursor.statusmessage, 'DROP TABLE') cursor.close()
def test_bad_static_values(self): with self.assertRaises(ValueError): c = CopyMapping(ExtendedMockObject, self.name_path, dict(name='NAME', number='NUMBER', dt='DATE'), encoding='UTF-8', static_mapping=dict(static_bad=1)) c.save()
def handle(self, *args, **kwargs): c = CopyMapping( # Give it the model Forum, # The path to your CSV args[1], # And a dict mapping the model fields to CSV headers dict(article_title='article_title')) # Then save it. c.save()
def test_bad_static_values(self): with self.assertRaises(ValueError): c = CopyMapping( ExtendedMockObject, self.name_path, dict(name='NAME', number='NUMBER', dt='DATE'), encoding='UTF-8', static_mapping={'static_bad':1,} ) c.save()
def test_encoding_save(self): c = CopyMapping(MockObject, self.null_path, dict(name='NAME', number='NUMBER', dt='DATE'), encoding='UTF-8') c.save() self.assertEqual(MockObject.objects.count(), 5) self.assertEqual(MockObject.objects.get(name='BADBOY').number, None) self.assertEqual( MockObject.objects.get(name='BEN').dt, date(2012, 1, 1))
def test_save_foreign_key(self): c = CopyMapping( MockObject, self.foreign_path, dict(name='NAME', number='NUMBER', dt='DATE', parent='PARENT')) c.save() self.assertEqual(MockObject.objects.count(), 3) self.assertEqual(MockObject.objects.get(name='BEN').parent_id, 4) self.assertEqual( MockObject.objects.get(name='BEN').dt, date(2012, 1, 1))
def test_static_values(self): c = CopyMapping(ExtendedMockObject, self.name_path, dict(name='NAME', number='NUMBER', dt='DATE'), static_mapping=dict(static_val=1, static_string='test')) c.save() self.assertEqual( ExtendedMockObject.objects.filter(static_val=1).count(), 3) self.assertEqual( ExtendedMockObject.objects.filter(static_string='test').count(), 3)
def handle(self, *args, **kwargs): c = CopyMapping( # Give it the model Person, # The path to your CSV '/path/to/my/data.csv', # And a dict mapping the model fields to CSV headers dict(name='NAME', number='NUMBER', dt='DATE') ) # Then save it. c.save()
def test_silent_save(self): c = CopyMapping( MockObject, self.name_path, dict(name='NAME', number='NUMBER', dt='DATE'), ) c.save(silent=True) self.assertEqual(MockObject.objects.count(), 3) self.assertEqual(MockObject.objects.get(name='BEN').number, 1) self.assertEqual( MockObject.objects.get(name='BEN').dt, date(2012, 1, 1))
def test_limited_save(self): c = CopyMapping( LimitedMockObject, self.name_path, dict(name='NAME', dt='DATE') ) c.save() self.assertEqual(LimitedMockObject.objects.count(), 3) self.assertEqual( LimitedMockObject.objects.get(name='BEN').dt, date(2012, 1, 1) )
def test_pipe_save(self): c = CopyMapping( MockObject, self.pipe_path, dict(name='NAME', number='NUMBER', dt='DATE'), delimiter="|", ) c.save() self.assertEqual(MockObject.objects.count(), 3) self.assertEqual(MockObject.objects.get(name='BEN').number, 1) self.assertEqual( MockObject.objects.get(name='BEN').dt, date(2012, 1, 1))
def test_field_override_save(self): c = CopyMapping( MockObject, self.null_path, dict(name='NAME', number='NUMBER', dt='DATE'), ) c.save() self.assertEqual(MockObject.objects.count(), 5) self.assertEqual(MockObject.objects.get(name='BADBOY').number, None) self.assertEqual( MockObject.objects.get(name='BEN').dt, date(2012, 1, 1) )
def handle(self, *args, **options): SalaryRecord.objects.all().delete() clean_file_path = os.path.join(settings.DATA_DIR, 'berkeley_faculty.csv') fields = [ field.name for field in SalaryRecord._meta.get_fields() if field.name != 'id' ] mapping = {field: field for field in fields} c = CopyMapping(SalaryRecord, clean_file_path, mapping) c.save()
def test_silent_save(self): c = CopyMapping( MockObject, self.name_path, dict(name='NAME', number='NUMBER', dt='DATE'), ) c.save(silent=True) self.assertEqual(MockObject.objects.count(), 3) self.assertEqual(MockObject.objects.get(name='BEN').number, 1) self.assertEqual( MockObject.objects.get(name='BEN').dt, date(2012, 1, 1) )
def test_pipe_save(self): c = CopyMapping( MockObject, self.pipe_path, dict(name='NAME', number='NUMBER', dt='DATE'), delimiter="|", ) c.save() self.assertEqual(MockObject.objects.count(), 3) self.assertEqual(MockObject.objects.get(name='BEN').number, 1) self.assertEqual( MockObject.objects.get(name='BEN').dt, date(2012, 1, 1) )
def handle(self, *args, **kwargs): ccpath = kwargs['crimeCatPath'] tppath = kwargs['targetPlacePath'] # ccpath = '.../ctype2cc_170627.csv' print('loadAncil: crimeCat from %s...' % (ccpath)) c1 = CopyMapping( CrimeCat, ccpath, # And a dict mapping the model fields to CSV headers # CSV header = CType, CC dict(ctypeDesc='CType', crimeCat='CC')) c1.save() print('loadAncil: NCrimeCat=%d' % (CrimeCat.objects.all().count())) # tppath = '.../targetPlace.csv' print('loadAncil: targetPlace from %s ...' % (tppath)) c2 = CopyMapping( TargetPlace, tppath, # And a dict mapping the model fields to CSV headers # CSV header = placeType, ylat, xlng, name, desc dict(placeType='placeType', ylat='ylat', xlng='xlng', name='name', desc='desc')) c2.save() print('loadAncil: NTargetPlace=%d' % (TargetPlace.objects.all().count()))
def test_static_mapping_ignore_non_mapped_headers(self): c = CopyMapping( BasicMockObject, self.name_path, dict(name='NAME', number='NUMBER', dt='DATE'), ) c.save() self.assertEqual(BasicMockObject.objects.count(), 3) c = CopyMapping(BasicMockObject, self.fk_mapping_path, dict(name='NAME', number='NUMBER', dt='DATE'), static_mapping={ 'parent': BasicMockObject.objects.get(name='ben').pk }, ignore_non_mapped_headers=True) c.save() self.assertEqual(BasicMockObject.objects.count(), 6) self.assertEqual( list( BasicMockObject.objects.order_by('name').values_list( 'name', 'parent__name')), [('ben', None), ('ben junior', 'ben'), ('jane', None), ('jane junior', 'ben'), ('joe', None), ('joe junior', 'ben')])
def test_foreignkey_mapping(self): c = CopyMapping(BasicMockObject, self.name_path, dict(name='NAME', number='NUMBER', dt='DATE')) c.save() self.assertEqual(BasicMockObject.objects.count(), 3) c = CopyMapping(BasicMockObject, self.fk_mapping_path, dict(name='NAME', number='NUMBER', dt='DATE', parent='PARENT'), field_value_mapping={ 'parent': { 'ben': BasicMockObject.objects.get(name='ben').pk, 'joe': BasicMockObject.objects.get(name='joe').pk, 'jane': BasicMockObject.objects.get(name='jane').pk }, }, field_copy_types={'parent': 'text'}) c.save() self.assertEqual(BasicMockObject.objects.count(), 6) self.assertEqual( list( BasicMockObject.objects.order_by('name').values_list( 'name', 'parent__name')), [('ben', None), ('ben junior', 'ben'), ('jane', None), ('jane junior', 'jane'), ('joe', None), ('joe junior', 'joe')])
def test_save_foreign_key(self): c = CopyMapping( MockObject, self.foreign_path, dict(name='NAME', number='NUMBER', dt='DATE', parent='PARENT') ) c.save() self.assertEqual(MockObject.objects.count(), 3) self.assertEqual(MockObject.objects.get(name='BEN').parent_id, 4) self.assertEqual( MockObject.objects.get(name='BEN').dt, date(2012, 1, 1) )
def test_static_values(self): c = CopyMapping( ExtendedMockObject, self.name_path, dict(name='NAME', number='NUMBER', dt='DATE'), static_mapping={'static_val':1,'static_string':'test'} ) c.save() self.assertEqual( ExtendedMockObject.objects.filter(static_val = 1).count(), 3 ) self.assertEqual( ExtendedMockObject.objects.filter(static_string = 'test').count(), 3 )
def test_bad_header(self): with self.assertRaises(ValueError): CopyMapping( MockObject, self.name_path, dict(name='NAME1', number='NUMBER', dt='DATE'), )
def test_missing_overload_field(self): with self.assertRaises(FieldDoesNotExist): CopyMapping( OverloadMockObject, self.name_path, dict(name='NAME', number='NUMBER', dt='DATE', missing='NAME'), )
def test_bad_field(self): with self.assertRaises(FieldDoesNotExist): CopyMapping( MockObject, self.name_path, dict(name1='NAME', number='NUMBER', dt='DATE'), )
def test_bad_csv(self): with self.assertRaises(ValueError): CopyMapping( MockObject, '/foobar.csv', dict(name='NAME', number='NUMBER', dt='DATE'), using='sqlite' )
def test_bad_backend(self): with self.assertRaises(TypeError): CopyMapping( MockObject, self.name_path, dict(name='NAME', number='NUMBER', dt='DATE'), using='sqlite' )
def load_postgresql(self): """ Load the file into a PostgreSQL database using COPY """ # Drop all the records from the target model's real table self.cursor.execute('TRUNCATE TABLE "%s" CASCADE' % (self.model._meta.db_table)) c = CopyMapping( self.model, self.csv, dict((f.name, f.db_column) for f in self.model._meta.fields), using=self.database ) c.save(silent=True) # Print out the results if self.verbosity > 2: csv_count = self.get_row_count() model_count = self.model.objects.count() self.finish_load_message(model_count, csv_count)
def test_overload_save(self): c = CopyMapping( OverloadMockObject, self.name_path, dict(name='NAME', number='NUMBER', dt='DATE'), overloaded_mapping=dict(lower_name='NAME') ) c.save() self.assertEqual(OverloadMockObject.objects.count(), 3) self.assertEqual(OverloadMockObject.objects.get(name='BEN').number, 1) self.assertEqual(OverloadMockObject.objects.get(lower_name='ben').number, 1) self.assertEqual( OverloadMockObject.objects.get(name='BEN').dt, date(2012, 1, 1) ) omo = OverloadMockObject.objects.first() self.assertEqual(omo.name.lower(), omo.lower_name)
def csv_to_db(self): raw_contents = self.get_raw_file('taxonomy') model = raw_contents.get('model')[0] # somehow we're getting a tuple path = raw_contents.get('path') raw_dict = raw_contents.get('raw') obj = CopyMapping(model, path, raw_dict) msg = 'CopyMapping incomplete: uncomment save() to make db changes' print msg
def load_postgresql(self, model, csv_path): """ Takes a model and a csv_path and loads it into postgresql """ # Drop all the records from the target model's real table self.cursor.execute('TRUNCATE TABLE "%s" CASCADE' % (model._meta.db_table)) c = CopyMapping( model, csv_path, dict((f.name, f.db_column) for f in model._meta.fields), ) c.save(silent=True) # Print out the results csv_count = self.get_row_count(csv_path) model_count = model.objects.count() self.finish_load_message(model_count, csv_count)
def load_postgresql(self, model, csv_path): """ Takes a model and a csv_path and loads it into postgresql """ # Drop all the records from the target model's real table self.cursor.execute('TRUNCATE TABLE "%s" CASCADE' % ( model._meta.db_table )) c = CopyMapping( model, csv_path, dict((f.name, f.db_column) for f in model._meta.fields), ) c.save(silent=True) # Print out the results csv_count = self.get_row_count(csv_path) model_count = model.objects.count() self.finish_load_message(model_count, csv_count)
def load_postgresql(self): """ Load the file into a PostgreSQL database using COPY """ # Drop all the records from the target model's real table self.cursor.execute('TRUNCATE TABLE "%s" CASCADE' % (self.model._meta.db_table)) c = CopyMapping( self.model, self.csv, dict((f.name, f.db_column) for f in self.model._meta.fields), using=self.database, ) c.save(silent=True) # Print out the results if self.verbosity > 2: csv_count = self.get_row_count() model_count = self.model.objects.count() self.finish_load_message(model_count, csv_count)
def test_overload_save(self): c = CopyMapping( OverloadMockObject, self.name_path, dict(name='NAME', lower_name='NAME', upper_name='NAME', number='NUMBER', dt='DATE'), ) c.save() self.assertEqual(OverloadMockObject.objects.count(), 3) self.assertEqual(OverloadMockObject.objects.get(name='ben').number, 1) self.assertEqual( OverloadMockObject.objects.get(lower_name='ben').number, 1) self.assertEqual( OverloadMockObject.objects.get(upper_name='BEN').number, 1) self.assertEqual( OverloadMockObject.objects.get(name='ben').dt, date(2012, 1, 1)) omo = OverloadMockObject.objects.first() self.assertEqual(omo.name.lower(), omo.lower_name)
def test_foreignkey_mapping(self): c = CopyMapping( BasicMockObject, self.name_path, dict(name='NAME', number='NUMBER', dt='DATE') ) c.save() self.assertEqual(BasicMockObject.objects.count(), 3) c = CopyMapping( BasicMockObject, self.fk_mapping_path, dict(name='NAME', number='NUMBER', dt='DATE', parent='PARENT'), field_value_mapping={ 'parent': { 'ben': BasicMockObject.objects.get(name='ben').pk, 'joe': BasicMockObject.objects.get(name='joe').pk, 'jane': BasicMockObject.objects.get(name='jane').pk }, }, field_copy_types={'parent': 'text'} ) c.save() self.assertEqual(BasicMockObject.objects.count(), 6) self.assertEqual( list(BasicMockObject.objects.order_by('name'). values_list('name', 'parent__name')), [('ben', None), ('ben junior', 'ben'), ('jane', None), ('jane junior', 'jane'), ('joe', None), ('joe junior', 'joe')] )
def import_csv(self, request): if 'csv' not in request.FILES: return JsonResponse( status=status.HTTP_400_BAD_REQUEST, data={ 'success': False, 'errors': ["Missing 'csv' parameter"] } ) csv = request.FILES['csv'] file_path = "/tmp/{0}.csv".format(str(uuid.uuid4())) with open(file_path, 'wb+') as destination: for chunk in csv.chunks(): destination.write(chunk) try: copy_mapping = CopyMapping( models.Concept, file_path, ConceptViewSet.CSV_COLUMN_MAPPING ) copy_mapping.save() except (ValueError, DatabaseError): expected_columns = ', '.join(ConceptViewSet.CSV_COLUMN_MAPPING.keys()) return JsonResponse( status=status.HTTP_400_BAD_REQUEST, data={ 'success': False, 'errors': ['CSV import error. Expected columns are {0}'.format(expected_columns)] } ) finally: os.remove(file_path) return JsonResponse({ 'success': True, })
def import_csv(self, request): if 'csv' not in request.FILES: return JsonResponse(status=status.HTTP_400_BAD_REQUEST, data={ 'success': False, 'errors': ["Missing 'csv' parameter"] }) csv = request.FILES['csv'] file_path = "/tmp/{0}.csv".format(str(uuid.uuid4())) with open(file_path, 'wb+') as destination: for chunk in csv.chunks(): destination.write(chunk) try: copy_mapping = CopyMapping(models.Concept, file_path, ConceptViewSet.CSV_COLUMN_MAPPING) copy_mapping.save() except (ValueError, DatabaseError): expected_columns = ', '.join( ConceptViewSet.CSV_COLUMN_MAPPING.keys()) return JsonResponse( status=status.HTTP_400_BAD_REQUEST, data={ 'success': False, 'errors': [ 'CSV import error. Expected columns are {0}'.format( expected_columns) ] }) finally: os.remove(file_path) return JsonResponse({ 'success': True, })
def insertRecords(requet): # save the data into model of Django c = CopyMapping( # Give it the model Product, # The path to your CSV 'file.csv', # And a dict mapping the model fields to CSV headers dict(category='category', name='name', image='image', price='price') ) # Then save it. c.save() # save the record into postgresql database conn = psycopg2.connect("dbname='testdb' user='******' host='localhost' password='******'") cur = conn.cursor() filesource = 'file.csv' reader = csv.reader(open(filesource, 'rb')) i=0 for row in reader: if(i!=0): cur.execute("INSERT INTO test_table (category, image, price, name) VALUES(%s, %s, %s, %s)",(row[0],row[1],row[2],row[3])) i=1 conn.commit() conn.close() # used for accesing data from postgresql database conn = psycopg2.connect("dbname='testdb' user='******' host='localhost' password='******'") cur = conn.cursor() cur.execute(""" select * from test_table """) results = [] rows = cur.fetchall() for row in rows: results.append(row) return render(requet,'insert.html',{})
def handle(self, *args, **options): if options['ticker']: symbols = [Symbol.objects.get(ticker=options['ticker'])] else: symbols = Symbol.objects.all().order_by('ticker') curdir = os.path.dirname(os.path.realpath(__file__)) for symbol in symbols: path = os.path.normpath(os.path.join(curdir, os.pardir, os.pardir, os.pardir, 'data', 'ohlcv', '%s.csv' % symbol.ticker)) print('Importing %s' % path) try: c = CopyMapping( # Give it the model SymbolQuote, # The path to your CSV path, # And a dict mapping the model fields to CSV headers dict(date='date', open='open', close='close', high='high', low='low', volume='volume'), static_mapping = {'symbol_id': symbol.id} ) c.save() self.stdout.write(self.style.SUCCESS('Successfully imported OHLCV rows for %s' % symbol.name)) except ValueError: self.stdout.write(self.style.ERROR('Could not find CSV file for %s in %s' % (symbol.name, path)))
def test_static_mapping_ignore_non_mapped_headers(self): c = CopyMapping( BasicMockObject, self.name_path, dict(name='NAME', number='NUMBER', dt='DATE'), ) c.save() self.assertEqual(BasicMockObject.objects.count(), 3) c = CopyMapping( BasicMockObject, self.fk_mapping_path, dict(name='NAME', number='NUMBER', dt='DATE'), static_mapping={'parent': BasicMockObject.objects.get(name='ben').pk}, ignore_non_mapped_headers=True ) c.save() self.assertEqual(BasicMockObject.objects.count(), 6) self.assertEqual( list(BasicMockObject.objects.order_by('name'). values_list('name', 'parent__name')), [('ben', None), ('ben junior', 'ben'), ('jane', None), ('jane junior', 'ben'), ('joe', None), ('joe junior', 'ben')] )
def test_limited_fields(self): CopyMapping( LimitedMockObject, self.name_path, dict(name='NAME', dt='DATE'), )
def handle(self, *args, **options): """ For each of the filing types we support, loops through the downloaded CSVs of that type, using the CopyMapping library to dump the records into the raw database, preserving all fields from the original data. """ self.header("Load raw FEC filings") name_to_model = { "F3X": RawF3XFiling, "F3P": RawF3PFiling, "F24": RawF24Filing, "sa": RawContribution, "se": RawIndependentExpenditure, } if options['flush']: self.log('Flushing the DB') for model in name_to_model.values(): model.objects.all().delete() # Characters to strip from filenames to get filing number ignore = ascii_letters + punctuation loaded = 0 for name, model in name_to_model.items(): data_dir = os.path.join(self.DATA_DIR, name) # If, for some reason, we have no filings of this type if not os.path.exists(data_dir): continue # We're making a mapping of CSV to model fields for CopyMapping # In this case, they're identical. mapping = { field: field for field in model._meta.get_all_field_names() } for file_name in os.listdir(data_dir): # Hidden files will mess with us if not file_name.startswith('.'): # Remove our list of ignore characters from the file name # to give us just the filing number. filing_no = int(file_name.translate(None, ignore)) if model.objects.filter(filing_no=filing_no).exists(): self.log( '{} CSV for filing {} is already in the DB'.format( name, filing_no ) ) else: self.log( 'Loading {} CSV for filing {}'.format( name, filing_no ) ) try: c = CopyMapping( model, os.path.join(data_dir, file_name), mapping ) c.save(silent=True) loaded += 1 except: self.failure('Error on {} CSV for filing {}'.format(name, filing_no)) self.success("Loaded {} CSVs".format(loaded))
def test_bad_call(self): with self.assertRaises(TypeError): CopyMapping()
def import_report(self, instance, report_id): report_base = os.path.join(instance.report_dir, report_id) logging.info("[%s] Processing %s", instance.id, report_base) # first load the metadata with open(report_base + '.json', 'r') as handle: meta = json.load(handle) try: validate(meta) except Exception as e: logging.exception(e) return False # Next we'll extract files. extracted_files = [] data_map = {} with tarfile.open(report_base + '.tar.gz', 'r') as tar: for member in tar: guessed = self.fix_name(member) if guessed == 'unknown': continue # fancy safe name. tmpname = uuid.uuid4().hex + '.tsv' extracted_to = os.path.join(TMPDIR, tmpname) logging.info("[%s] Extracting %s to %s, guessed:%s", instance.id, member.name, extracted_to, guessed) # Record where the 'params' file is or the 'metrics' file. data_map[guessed] = extracted_to # Change the archive member's name in order to ensure that it # is extracted to somewhere with a safe name. member.name = tmpname # Extract into CWD. tar.extract(member, TMPDIR) # Track where we put it for cleanup later. extracted_files.append(extracted_to) if 'jobs' in data_map: c = CopyMapping( Job, data_map['jobs'], dict(external_job_id='id', tool_id='tool_id', tool_version='tool_version', state='state', create_time='create_time'), quote_character="\b", static_mapping={ 'instance_id': instance.id, }, delimiter='\t' ) c.save() if 'params' in data_map: c = CopyMapping( JobParam, data_map['params'], dict(external_job_id='job_id', name='name', value='value'), quote_character="\b", static_mapping={ 'instance_id': instance.id, }, delimiter='\t' ) c.save() if 'metric_num' in data_map: c = CopyMapping( MetricNumeric, data_map['metric_num'], dict(external_job_id='job_id', plugin='plugin', name='name', value='value'), quote_character="\b", static_mapping={ 'instance_id': instance.id, }, delimiter='\t' ) c.save() if 'datasets' in data_map: c = CopyMapping( Dataset, data_map['datasets'], dict( external_job_id='job_id', external_dataset_id='dataset_id', extension='extension', file_size='file_size', param_name='param_name', file_type='type', ), quote_character="\b", static_mapping={ 'instance_id': instance.id, }, delimiter='\t' ) c.save() for f in extracted_files: try: logging.info("[%s] Cleaning up %s", instance.id, f) os.unlink(f) except Exception as e: logging.exception(e) if 'users' in meta: if 'active' in meta['users']: instance.users_recent = meta['users']['active'] if 'total' in meta['users']: instance.users_total = meta['users']['total'] if 'jobs' in meta: if 'ok' in meta['jobs']: instance.jobs_run = meta['jobs']['ok'] instance.last_import = report_id instance.save() return True
def handle(self, *args, **kwargs): # Assign kwargs from url to variables inputfile = kwargs['inputfile'] typesource = kwargs['typesource'] directory = kwargs['directory'] app = kwargs['app'] model = kwargs['model'] # Validate app if not apps.is_installed(app): logger.error(f' ERROR app {app} not found') return # Validate model try: model_class = get_model_cl(app, model) except AttributeError as e: logger.error(f' ERROR model {model} not found, exception {e}') return # Check if data source is from a directory or a url and assign input_file to selected option if typesource == 'web': # Assign url to inputfile url = str(inputfile) # Check if sizelimit kwarg passed size_limit = kwargs['sizelimit'] if size_limit: # Get size of file to download in megabytes info_url = requests.head(url) size_bytes_url = info_url.headers['Content-Length'] size_mb_url = int(size_bytes_url) / (1024 * 1024) # If file to download is over limit then log error and stop processing if size_mb_url > int(size_limit): logger.error( f' ERROR {url} is larger than maximum size allowed: {size_limit} MB' ) return # logger.info(f' Started importing input URL: {url}') req = requests.get(url) url_content = req.content input_file = Path(f'{directory}/{model}_downloaded.csv') csv_file = open(input_file, 'wb') csv_file.write(url_content) csv_file.close() elif typesource == 'directory': input_file = Path(inputfile) logger.info(f' Started importing input file: {input_file}') else: logger.error( f' ERROR non-valid value entered for "typesource": {typesource}' ) return # Get parent class name parent_class_name = model_class.__base__.__name__ # Get line cleaner function try: line_cleaner = self.get_line_cleaner(parent_class_name) except Exception as e: logger.error(e) return # Assign other variables used to write csv_temporary csv_temporary = Path(f'{directory}/{model}_temporary.csv') input_fields = model_class.input_fields database_fields = [ field.name for field in model_class._meta.fields if field.name != 'id' ] date_format = model_class.date_format written_timestamps = [] rows_before = 24 rows_after = 0 rows_buffer = [] nead_header = [] # Write data in input_file into csv_temporary with additional computed fields try: with open(csv_temporary, 'w', newline='') as sink, open(input_file, 'r') as source: sink.write(','.join(database_fields) + '\n') records_written = 0 # Skip number of header lines designated in parent class header line count for i in range(model_class.header_line_count): first_lines = source.readline() nead_header.append(first_lines) next(source, None) while True: line = source.readline() if not line: break line_array = [ v for v in line.strip().split(model_class.delimiter) if len(v) > 0 ] # Skip header lines that start with designated parent class header symbol # For example: the '#' character if line.startswith(model_class.header_symbol): nead_header.append(line) continue if len(line_array) != len(input_fields): error_msg = f' ERROR: line has {len(line_array)} values, header has {len(input_fields)} columns' logger.error(error_msg) raise ValueError(error_msg) row = { input_fields[i]: line_array[i] for i in range(len(line_array)) } # Process row and add new computed fields line_clean = line_cleaner(row, date_format) # Make timestamp_iso value a UTC timezone aware datetime object dt_obj = line_clean['timestamp_iso'] aware_dt = make_aware(dt_obj) # Check if record with identical timestamp already exists in table, otherwise write record to # temporary csv file after checking for record with duplicate timestamp try: model_class.objects.get(timestamp_iso=aware_dt) except model_class.DoesNotExist: if line_clean[ 'timestamp_iso'] not in written_timestamps: # keep timestamps length small written_timestamps = written_timestamps[ (-1) * min(len(written_timestamps), 1000):] written_timestamps += [line_clean['timestamp_iso']] # slide the row buffer window rows_buffer = rows_buffer[ (-1) * min(len(rows_buffer), rows_before + rows_after):] + [line_clean] # check values before and after if len(rows_buffer) > rows_after: sink.write(','.join([ "{0}".format(v) for v in rows_buffer[-(1 + rows_after)].values() ]) + '\n') records_written += 1 # Write nead header configuration file if applicable if nead_header: header_symbol = model_class.header_symbol write_nead_config(app, nead_header, model, parent_class_name, header_symbol) except FileNotFoundError as e: logger.error(f' ERROR file not found {input_file}, exception {e}') return # Assign copy_dictionary from database_fields copy_dictionary = { database_fields[i]: database_fields[i] for i in range(0, len(database_fields)) } # Import processed and cleaned data into Postgres database c = CopyMapping( # Assign model model_class, # Temporary CSV with input data and computed fields csv_temporary, # Dictionary mapping the model fields to CSV fields copy_dictionary, ) # Then save it. c.save() # Log import message logger.info( f' Finished import: {records_written} new records written in {model}' ) # Delete csv_temporary os.remove(csv_temporary) # If file downloaded from web delete it if os.path.isfile(f'{directory}/{model}_downloaded.csv'): os.remove(f'{directory}/{model}_downloaded.csv')