def __init__(self, path): self.path = path i = 1 with open(path) as f: for line in f: app.log('contrib: {}'.format(i), level=5) contribution_record = dict(zip(self.keys, [x.strip() for x in line.split('|')])) if contribution_record['STATE'] == 'CA' and contribution_record['CITY'] == 'LONG BEACH': try: this_contrib = Contribution(**contribution_record) this_committee = Committee.get(CMTE_ID=this_contrib.CMTE_ID) this_contrib.committee = this_committee if i % app.BATCH_SIZE == 0: app.log('commit!', level=3) pony.commit() except pony.core.CacheIndexError: pass except pony.core.TransactionIntegrityError: pass i += 1 try: pony.commit() except pony.core.CacheIndexError: pass except pony.core.TransactionIntegrityError: pass
def ingest(filepath): '''Ingest file into database''' print "Ingesting %s" % filepath rows_in_file = parse_fec_file(filepath) myfile = File.get_or_create(name=filepath) myfile_id = myfile.id with db.transaction(): # TODO: More sane error handling for idx in range(0, len(rows_in_file), 500): # Ingest 500 rows at a time print "Inserting row %d of %s" % (idx, filepath) rows_subset = rows_in_file[idx:idx+500] rows_to_insert = [] for row in rows_subset: unsaved_new_contribution = Contribution(**row_to_dict(row)) import pdb; pdb.set_trace() # If the row isn't already there, insert it if : pass # If the row is there, check for modifications elif: # If it has not been modified, simply add a ContributionHistory object if: # If it has been modified, create a new object and give the new object a contribution history else: pass Contribution.insert_many(rows_subset).execute()
def ingest(filepath): '''Ingest file into database''' print "Ingesting %s" % filepath rows = parse_fec_file(filepath) # check history table to see if this file is done with db.transaction(): for idx, row in enumerate(rows): print "Checking row %d of %d from %s" % (idx, len(rows), filepath) try: contribution_in_db = Contribution.get(cycle=row['cycle'], sub_id=row['sub_id']) except Contribution.DoesNotExist: contribution_in_db = None # If the row isn't already there, insert it if not contribution_in_db: print t.cyan("\tInserting new row %d of %s" % (idx, filepath)) new_contribution = Contribution.create(**row) ContributionHistory.create(contribution=new_contribution.id, date=row['date'], cycle=row['cycle'], sub_id=row['sub_id']) # If the row is there, check for modifications else: # If it has not been modified, simply add a ContributionHistory object contribution_in_db_dict = get_dictionary_from_model(contribution_in_db) # x = {k:v for k,v in contribution_in_db_dict.iteritems() if k not in ["date", "id"]} # y = {k:v for k,v in row.iteritems() if k != "date"} if {k:v for k,v in contribution_in_db_dict.iteritems() if k not in ["date", "id"]} == {k:v for k,v in row.iteritems() if k != "date"}: print t.white("\tNo changes found in row %d of %s" % (idx, filepath)) ContributionHistory.create(contribution=contribution_in_db.id, date=row['date'], cycle=row['cycle'], sub_id=row['sub_id']) # If it has been modified, create a new object and give the new object a contribution history else: print t.magenta("\tDetected change in row %d of %s" % (idx, filepath)) # print diff(x,y) # import pdb; pdb.set_trace() ContributionChanges.create(contribution=contribution_in_db.id, **{k:v for k,v in contribution_in_db_dict.iteritems() if k != "id"}) for k,v in row.iteritems(): if v != getattr(contribution_in_db, k): setattr(contribution_in_db, k, v) contribution_in_db.save() ContributionHistory.create(contribution=contribution_in_db.id, date=row['date'], cycle=row['cycle'], sub_id=row['sub_id']) myfile, _ = File.get_or_create( name = os.path.basename(filepath), years=next(re.finditer(r'\d{4}_\d{4}', os.path.basename(filepath))), sha1 = sha1OfFile(filepath), updated = dateparse(os.path.dirname(filepath).split("/")[-1].replace("downloaded_", "").replace("_", "-")).date(), ingested = True )
def post_contribution(req, slug): survey = get_object_or_404(Survey, slug=slug) if not survey.published: return HttpResponseForbidden('Survey is closed') qd = { 'text': req.POST['inputText'] } # Don't make a full check, just make a sanity check if survey.questionnaire == Survey.QUESTIONNAIRE_SCHOOL_TRIPS and \ req.POST.has_key('inputMeans') and req.POST.has_key('inputAge'): qd['means'] = req.POST['inputMeans'] qd['helmet'] = req.POST['inputHelmet'] qd['age'] = req.POST['inputAge'] qd['destination'] = req.POST['inputDestination'] # TODO: Error handling contribution = Contribution() contribution.survey = survey contribution.geometry_data = req.POST['inputGeometry'] contribution.questionnaire_data = json.dumps(qd) contribution.ip_address = req.META['REMOTE_ADDR'] contribution.save() rsp = { 'success': True } time.sleep(1) return JsonResponse(rsp)
def hello(): before = dateparse(request.args.get('before')).date() after = dateparse(request.args.get('after')).date() ret = [] for contrib in ContributionChanges.select().where(ContributionChanges.date == before): before_contrib_dict = get_dictionary_from_model(contrib) after_contrib_dict = get_dictionary_from_model(Contribution.get(sub_id=contrib.sub_id).get_on_date(after)) before_contrib_dict.pop('id') before_contrib_dict.pop('contribution') after_contrib_dict.pop('id') after_contrib_dict['transaction_pgi'] = transaction_pgi_dict[after_contrib_dict['transaction_pgi']] if after_contrib_dict['transaction_pgi'] else None before_contrib_dict['transaction_pgi'] = transaction_pgi_dict[before_contrib_dict['transaction_pgi']] if before_contrib_dict['transaction_pgi'] else None after_contrib_dict['ammendment_id'] = ammendment_id_dict[after_contrib_dict['ammendment_id']] if after_contrib_dict['ammendment_id'] else None before_contrib_dict['ammendment_id'] = ammendment_id_dict[before_contrib_dict['ammendment_id']] if before_contrib_dict['ammendment_id'] else None after_contrib_dict['transaction_tp'] = transaction_tp_dict[after_contrib_dict['transaction_tp']] if after_contrib_dict['transaction_tp'] else None before_contrib_dict['transaction_tp'] = transaction_tp_dict[before_contrib_dict['transaction_tp']] if before_contrib_dict['transaction_tp'] else None after_contrib_dict['report_type'] = report_type_dict[after_contrib_dict['report_type']] if after_contrib_dict['report_type'] else None before_contrib_dict['report_type'] = report_type_dict[before_contrib_dict['report_type']] if before_contrib_dict['report_type'] else None # report_type_dict ret.append({ "before": before_contrib_dict, "after": after_contrib_dict, "changes": list(set([x[1][0][0] for x in diff(before_contrib_dict, after_contrib_dict).diffs if x[0] not in ["equal", "context_end_container"] and x[1][0][0] not in ['contribution', 'date', 'id']])) }) before_sub_ids = set([x.sub_id for x in ContributionHistory.select().where(ContributionHistory.date == before)]) after_sub_ids = set([x.sub_id for x in ContributionHistory.select().where(ContributionHistory.date == after)]) for sub_id in (before_sub_ids - after_sub_ids): ret.append({ "before": get_dictionary_from_model(Contribution.get(sub_id=sub_id)), "after": None, "changes": None }) return render_template('diff.html', ret=ret, before=before, after=after)
def seed_from(filepath): '''Ingest file into sqlite database''' print "Ingesting %s" % filepath rows = parse_fec_file(filepath) with db.transaction(): myfile, _ = File.get_or_create( name = os.path.basename(filepath), years=next(re.finditer(r'\d{4}_\d{4}', os.path.basename(filepath))), sha1 = sha1OfFile(filepath), updated = dateparse(os.path.dirname(filepath).split("/")[-1].replace("downloaded_", "").replace("_", "-")).date(), ingested = False ) for idx in range(0, len(rows), 500): print "Inserting row %d of %d from %s" % (idx, len(rows), filepath) rows_subset = rows[idx:idx+500] Contribution.insert_many(rows_subset).execute() myfile.update(ingested=True)