コード例 #1
0
def ingest(filepath):
    '''Ingest file into database'''
    print "Ingesting %s" % filepath
    rows = parse_fec_file(filepath)
    # check history table to see if this file is done

    with db.transaction():
        for idx, row in enumerate(rows):
            print "Checking row %d of %d from %s" % (idx, len(rows), filepath)

            try:
                contribution_in_db = Contribution.get(cycle=row['cycle'], sub_id=row['sub_id'])
            except Contribution.DoesNotExist:
                contribution_in_db = None

            # If the row isn't already there, insert it
            if not contribution_in_db:
                print t.cyan("\tInserting new row %d of %s" % (idx, filepath))
                new_contribution = Contribution.create(**row)
                ContributionHistory.create(contribution=new_contribution.id, date=row['date'], cycle=row['cycle'], sub_id=row['sub_id'])

            # If the row is there, check for modifications
            else:
                # If it has not been modified, simply add a ContributionHistory object
                contribution_in_db_dict = get_dictionary_from_model(contribution_in_db)

                # x = {k:v for k,v in contribution_in_db_dict.iteritems() if k not in ["date", "id"]}
                # y = {k:v for k,v in row.iteritems() if k != "date"}

                if {k:v for k,v in contribution_in_db_dict.iteritems() if k not in ["date", "id"]} == {k:v for k,v in row.iteritems() if k != "date"}:
                    print t.white("\tNo changes found in row %d of %s" % (idx, filepath))
                    ContributionHistory.create(contribution=contribution_in_db.id, date=row['date'], cycle=row['cycle'], sub_id=row['sub_id'])
                # If it has been modified, create a new object and give the new object a contribution history
                else:
                    print t.magenta("\tDetected change in row %d of %s" % (idx, filepath))
                    # print diff(x,y)

                    # import pdb; pdb.set_trace()

                    ContributionChanges.create(contribution=contribution_in_db.id, **{k:v for k,v in contribution_in_db_dict.iteritems() if k != "id"})

                    for k,v in row.iteritems():
                        if v != getattr(contribution_in_db, k):
                            setattr(contribution_in_db, k, v)

                    contribution_in_db.save()

                    ContributionHistory.create(contribution=contribution_in_db.id, date=row['date'], cycle=row['cycle'], sub_id=row['sub_id'])

        myfile, _ = File.get_or_create(
                name = os.path.basename(filepath),
                years=next(re.finditer(r'\d{4}_\d{4}', os.path.basename(filepath))),
                sha1 = sha1OfFile(filepath),
                updated = dateparse(os.path.dirname(filepath).split("/")[-1].replace("downloaded_", "").replace("_", "-")).date(),
                ingested = True
                )