def ingest(filepath): '''Ingest file into database''' print "Ingesting %s" % filepath rows = parse_fec_file(filepath) # check history table to see if this file is done with db.transaction(): for idx, row in enumerate(rows): print "Checking row %d of %d from %s" % (idx, len(rows), filepath) try: contribution_in_db = Contribution.get(cycle=row['cycle'], sub_id=row['sub_id']) except Contribution.DoesNotExist: contribution_in_db = None # If the row isn't already there, insert it if not contribution_in_db: print t.cyan("\tInserting new row %d of %s" % (idx, filepath)) new_contribution = Contribution.create(**row) ContributionHistory.create(contribution=new_contribution.id, date=row['date'], cycle=row['cycle'], sub_id=row['sub_id']) # If the row is there, check for modifications else: # If it has not been modified, simply add a ContributionHistory object contribution_in_db_dict = get_dictionary_from_model(contribution_in_db) # x = {k:v for k,v in contribution_in_db_dict.iteritems() if k not in ["date", "id"]} # y = {k:v for k,v in row.iteritems() if k != "date"} if {k:v for k,v in contribution_in_db_dict.iteritems() if k not in ["date", "id"]} == {k:v for k,v in row.iteritems() if k != "date"}: print t.white("\tNo changes found in row %d of %s" % (idx, filepath)) ContributionHistory.create(contribution=contribution_in_db.id, date=row['date'], cycle=row['cycle'], sub_id=row['sub_id']) # If it has been modified, create a new object and give the new object a contribution history else: print t.magenta("\tDetected change in row %d of %s" % (idx, filepath)) # print diff(x,y) # import pdb; pdb.set_trace() ContributionChanges.create(contribution=contribution_in_db.id, **{k:v for k,v in contribution_in_db_dict.iteritems() if k != "id"}) for k,v in row.iteritems(): if v != getattr(contribution_in_db, k): setattr(contribution_in_db, k, v) contribution_in_db.save() ContributionHistory.create(contribution=contribution_in_db.id, date=row['date'], cycle=row['cycle'], sub_id=row['sub_id']) myfile, _ = File.get_or_create( name = os.path.basename(filepath), years=next(re.finditer(r'\d{4}_\d{4}', os.path.basename(filepath))), sha1 = sha1OfFile(filepath), updated = dateparse(os.path.dirname(filepath).split("/")[-1].replace("downloaded_", "").replace("_", "-")).date(), ingested = True )