コード例 #1
0
ファイル: datagrabber.py プロジェクト: crbikebike/data-rocket
    def __get_api_data__(self, root_key, extra_params=None, filters=None):
        """
        Accepts params for Harvest endpoint and returns result set. Meant to be portable for all v2 endpoints.
        """
        api_params = {}
        if extra_params:
            api_params.update(extra_params)
        api_json_result = self.__get_request__(api_url=root_key, extra_params=api_params)

        # Get page numbers, build queue
        page_qty = api_json_result['total_pages']
        page_queue = deque(range(1, (page_qty + 1)))
        total_pages = api_json_result['total_pages']
        total_entries = api_json_result['total_entries']

        # Process the queue until empty
        api_list = []
        # Keep track of ids added to list to prevent inserting multiple of the same record
        id_list = []
        api_json_result.update(id_list=id_list)

        print('Starting {name} Harvest Pull ({entries} Entries, {pages} Pages)'.format(name=root_key.capitalize(),
                                                                                       entries=total_entries,
                                                                                       pages=total_pages))
        while len(page_queue) > 0:
            page_num = page_queue.popleft()
            api_params.update(page=page_num)
            # Request api load for the current page
            page_json_result = self.__get_request__(api_url=root_key, extra_params=api_params)
            api_entities = page_json_result[root_key]
            # print(
            #     'Processing Page: ' + str(page_json_result['page']) + ' out of ' + str(page_json_result['total_pages']))

            # If there are keys to filter, do that. Otherwise just add the entire resposne to the api_list
            for entity in api_entities:
                if entity['id'] not in api_json_result['id_list']:
                    entity = self.__filter_results__(results_dict=entity, filter_list=filters)
                    # Some results have sub-dictionaries so we want to flatten them
                    flat_entity = self.__flatten_results__(entity)
                    api_list.append(flat_entity)
                    id_list.append(flat_entity['id'])
                else:
                    pass

            logger.print_progress_bar(iteration=page_json_result['page'], total=total_pages)

        # Replace the endpoint data with our updated info
        api_json_result.update({root_key: api_list})

        return api_json_result
コード例 #2
0
ファイル: datamunger.py プロジェクト: crbikebike/data-rocket
    def munge_task(self):
        """Get all Harvest Tasks and send them to the db.

        :return:
        None
        """
        updated_since = self.task_last_updated
        # Get the Harvest Tasks List from its API
        harvest_tasks = self.harv.get_harvest_tasks(
            updated_since=updated_since)
        harvest_tasks_list = harvest_tasks['tasks']

        print('Writing Tasks')
        logger.print_progress_bar(iteration=0, total=len(harvest_tasks_list))
        for idx, task in enumerate(harvest_tasks_list):
            t_id = task['id']
            dt_updated_at = datetime.strptime(task['updated_at'],
                                              datetime_format)
            task.update(updated_at=dt_updated_at)

            # If a task is already in the DB, update it.  Otherwise insert it.
            try:
                if Task.get(id=t_id):
                    Task[t_id].set(**task)
                else:
                    t = Task(id=task['id'],
                             name=task['name'],
                             updated_at=task['updated_at'])
                # Commit the record to the db
                db.commit()
            except Exception as e:
                desc = "Task Entry Error - id: {}".format(task['id'])
                logger.write_load_completion(documents=str(e),
                                             description=desc)
            # Update the on-screen progress bar
            logger.print_progress_bar(iteration=idx + 1,
                                      total=len(harvest_tasks_list))
コード例 #3
0
ファイル: datamunger.py プロジェクト: crbikebike/data-rocket
    def munge_assignment(self):
        """Converts Forecast API into data warehouse friendly data

        Forecast API is very different than Harvest, so requires quite a bit of munging.
        Takes the date range from the Forecast entry and splits it into individual business day entries
        Calculates the hours/day for each assignment (source shows in seconds)
        Replaces API identity values with data warehouse ones
        """
        assignments = self.fore.get_forecast_assignments()
        assignments_list = assignments['assignments']

        # Trim Assignments list by updated date
        updated_since = self.assn_last_updated
        assignments_list = self.__trim_forecast_results__(
            f_result_set=assignments_list, trim_datetime=updated_since)

        # Get stats about the write
        total_parent_assns = len(assignments_list)

        print("Writing Assignments ({} Parent Assignments)".format(
            total_parent_assns))
        logger.print_progress_bar(iteration=0, total=total_parent_assns)
        for idx, assn in enumerate(assignments_list):
            # Grab information for split entries
            id = assn.pop('id')
            start_date = assn.pop('start_date')
            end_date = assn.pop('end_date')
            updated_at = assn.pop('updated_at')
            updated_at = datetime.strptime(updated_at, datetime_format_ms)

            # Convert Allocation to hours from seconds
            allocation = assn.pop('allocation') / 3600

            # Update Assignment Project and Person fk's to match Data Warehouse
            pr = get_project_by_id(assn['project_id'])
            assn.update(project_id=pr.id)

            # Check if record has person id, if it does prepare data for next step
            if assn['person_id']:
                p = get_person_by_id(assn['person_id'])
                assn.update(person_id=p.id)
                # Generate date list between start/end of assignment
                dates = self.__make_date_list__(start=start_date, end=end_date)
            else:
                # Make dates an empty list so it does not write the split assignments to the db
                dates = []

            # Check if assignment records exist already with our parent id, delete if so
            a_recs = get_assignments_by_parent(parent_id=id)
            for rec in a_recs:
                Time_Assignment[rec.id].delete()
            db.commit()

            # For each business day in the assignment, make a new split entry
            for day in dates:
                if is_busday(day):
                    split_assn = assn.copy()
                    split_assn.update(parent_id=id)
                    split_assn.update(assign_date=(day).strftime(date_format))
                    split_assn.update(allocation=allocation)

                    # Insert the Time Assignment record
                    try:
                        ta = Time_Assignment(parent_id=id,
                                             person_id=assn['person_id'],
                                             project_id=assn['project_id'],
                                             assign_date=day,
                                             allocation=allocation,
                                             updated_at=updated_at)
                        db.commit()
                    except Exception as e:
                        desc = "Time Assignment Error - id: {}".format(
                            assn['id'])
                        logger.write_load_completion(documents=str(e),
                                                     description=desc)
                else:
                    pass

            # Update the on-screen progress bar
            logger.print_progress_bar(iteration=idx + 1,
                                      total=total_parent_assns)
コード例 #4
0
ファイル: datamunger.py プロジェクト: crbikebike/data-rocket
    def munge_time_entries(self):
        """Pulls Time Entries for a given range and sends them to the data warehouse

        """
        last_updated = self.time_entry_last_updated
        entries = self.harv.get_harvest_time_entries(
            updated_since=last_updated)
        entries_list = entries['time_entries']
        # Get stats for console progress bar
        total_entries = len(entries_list)
        rounder = lambda x: round(x * 4) / 4

        print("Writing Time Entries ({} total)".format(total_entries))
        logger.print_progress_bar(iteration=0, total=total_entries)
        for idx, entry in enumerate(entries_list):
            # Convert dates to ORM friendly Python objects
            entry.update(
                spent_date=datetime.strptime(entry['spent_date'], date_format))
            entry.update(created_at=datetime.strptime(entry['created_at'],
                                                      datetime_format))
            entry.update(updated_at=datetime.strptime(entry['updated_at'],
                                                      datetime_format))

            # Make keys data warehouse friendly
            entry.update(person_id=entry.pop('user_id'))
            entry.update(person_name=entry.pop('user_name'))

            # Round the hours field to match the billing preferences used in Harvest
            entry.update(hours=rounder(entry['hours']))

            # Calculate the total entry value
            if entry['billable_rate']:
                entry.update(entry_amount=(entry['hours'] *
                                           entry['billable_rate']))
            else:
                entry.update(entry_amount=0)

            # Update person, project, and client fk's to match data warehouse
            p = get_person_by_id(entry['person_id'])
            entry.update(person_id=p.id)
            pr = get_project_by_id(entry['project_id'])
            entry.update(project_id=pr.id)
            c = get_client_by_id(entry['client_id'])
            entry.update(client_id=c.id)

            try:
                # If entry exists, update.  Else write new entry
                te = Time_Entry.get(id=entry['id'])
                if te:
                    te.set(**entry)
                else:
                    # Write the new time entry
                    nte = Time_Entry(id=entry['id'],
                                     spent_date=entry['spent_date'],
                                     hours=entry['hours'],
                                     billable=entry['billable'],
                                     billable_rate=entry['billable_rate'],
                                     created_at=entry['created_at'],
                                     updated_at=entry['updated_at'],
                                     entry_amount=entry['entry_amount'],
                                     person_id=entry['person_id'],
                                     person_name=entry['person_name'],
                                     project_id=entry['project_id'],
                                     project_name=entry['project_name'],
                                     project_code=entry['project_code'],
                                     client_id=entry['client_id'],
                                     client_name=entry['client_name'],
                                     task_id=entry['task_id'],
                                     task_name=entry['task_name'])
            except Exception as e:
                desc = "Time Entry Error - id: {}".format(entry['id'])
                logger.write_load_completion(documents=str(e),
                                             description=desc)

            # Commit entries
            db.commit()
            logger.print_progress_bar(iteration=idx + 1, total=total_entries)

        # Trunc legacy entries table and copy time_entry values to it
        print("Copying records to legacy entries table")
        trunc_legacy_entries()
        copy_to_legacy_entries()
コード例 #5
0
ファイル: datamunger.py プロジェクト: crbikebike/data-rocket
    def munge_person(self):
        """Get all Harvest and Forecast people, combine, transform, and push them to db

        :return:
        None
        """
        # Get Harvest and Forecast people
        updated_since = self.person_last_updated
        harvest_people = self.harv.get_harvest_users(
            updated_since=updated_since)
        harvest_people_list = harvest_people['users']
        forecast_people = self.fore.get_forecast_people()
        forecast_people_list = forecast_people['people']
        # Trim the Forecast list to the updated_date
        forecast_people_list = self.__trim_forecast_results__(
            f_result_set=forecast_people_list, trim_datetime=updated_since)

        # Update Primary Key, Get Forecast id for each person
        for h_person in harvest_people_list:
            h_person.update(harvest_id=h_person.pop('id'))

            # Convert the datetime strings into python datetime objects so the ORM can use them
            h_person.update(created_at=datetime.strptime(
                h_person['created_at'], datetime_format))
            h_person.update(updated_at=datetime.strptime(
                h_person['updated_at'], datetime_format))

            # Replace the roles key with primary role
            self.__set_primary_role__(h_person)

            # Try to find a Forecast id in the newly pulled list
            for idx, f_person in enumerate(forecast_people_list):
                if h_person['harvest_id'] == f_person['harvest_id']:
                    h_person.update(forecast_id=f_person['id'])
                    forecast_people_list.pop(idx)
                else:
                    pass

            # Also see if the person has a Forecast ID in the db if the person still doesn't have a Forecast ID
            if 'forecast_id' not in h_person.keys():
                try:
                    p = Person.get(harvest_id=h_person['harvest_id'])
                    if p:
                        h_person.update(forecast_id=p.forecast_id)
                    else:
                        h_person.update(forecast_id=None)
                except:
                    pass

        # For each Person record, check if in db and then insert/update accordingly
        print('Writing People:')
        logger.print_progress_bar(iteration=0, total=len(harvest_people_list))
        for idx, person in enumerate(harvest_people_list):
            harvest_id = person['harvest_id']
            full_name = "{fn} {ln}".format(fn=person['first_name'],
                                           ln=person['last_name'])

            # If a Person is in db update, otherwise insert
            try:
                p = Person.get(harvest_id=harvest_id)
                if p:
                    p.set(**person)
                else:
                    np = Person(harvest_id=harvest_id,
                                forecast_id=person['forecast_id'],
                                first_name=person['first_name'],
                                last_name=person['last_name'],
                                full_name=full_name,
                                email=person['email'],
                                timezone=person['timezone'],
                                weekly_capacity=person['weekly_capacity'],
                                is_contractor=person['is_contractor'],
                                is_active=person['is_active'],
                                roles=person['roles'],
                                avatar_url=person['avatar_url'],
                                created_at=person['created_at'],
                                updated_at=person['updated_at'])
                # Commit the record
                db.commit()
            except Exception as e:
                desc = "Person Entry Error - id: {}".format(
                    person['harvest_id'])
                logger.write_load_completion(documents=str(e),
                                             description=desc)
            # Update the on-screen progress bar
            logger.print_progress_bar(iteration=idx + 1,
                                      total=len(harvest_people_list))

        # Cycle through remaining Forecast people to update forecast_id, if needed
        for f_person in forecast_people_list:
            f_person.update(forecast_id=f_person.pop('id'))
            full_name = "{fn} {ln}".format(fn=f_person['first_name'],
                                           ln=f_person['last_name'])
            is_active = not f_person.pop('archived')
            f_person.update(is_active=is_active)
            f_person.update(updated_at=datetime.strptime(
                f_person['updated_at'], datetime_format_ms))

            if f_person['harvest_id']:
                p = Person.get(harvest_id=f_person['harvest_id'])
                p.forecast_id = f_person['forecast_id']
            else:
                # If orphan Person exists, update. Else, insert.
                try:
                    fp = Person.get(forecast_id=f_person['forecast_id'])
                    if fp:
                        fp.set(**f_person)
                    else:
                        nfp = Person(forecast_id=f_person['forecast_id'],
                                     first_name=f_person['first_name'],
                                     last_name=f_person['last_name'],
                                     full_name=full_name,
                                     email=f_person['email'],
                                     is_active=f_person['is_active'],
                                     updated_at=f_person['updated_at'])
                except Exception as e:
                    desc = "Forecast Person Entry Error - id: {}".format(
                        f_person['forecast_id'])
                    logger.write_load_completion(documents=str(e),
                                                 description=desc)
            # Commit the records
            db.commit()
コード例 #6
0
ファイル: datamunger.py プロジェクト: crbikebike/data-rocket
    def munge_project(self):
        """Pulls Harvest and Forecast projects and inserts/updates records

        """
        updated_since = self.project_last_updated
        harvest_projects = self.harv.get_harvest_projects(
            updated_since=updated_since)
        harvest_projects_list = harvest_projects['projects']
        forecast_projects = self.fore.get_forecast_projects()
        forecast_projects_list = forecast_projects['projects']
        # Trim Forecast list based on updated_since var
        forecast_projects_list = self.__trim_forecast_results__(
            f_result_set=forecast_projects_list, trim_datetime=updated_since)

        # Update Primary Key, get Forecast ID for each record
        for h_proj in harvest_projects_list:
            h_proj.update(harvest_id=h_proj.pop('id'))
            h_proj.update(budget=h_proj.pop('cost_budget'))
            h_proj.update(forecast_id=None)
            # Convert the date keys into Python date objects so ORM can use them
            h_proj.update(created_at=datetime.strptime(h_proj['created_at'],
                                                       datetime_format))
            h_proj.update(updated_at=datetime.strptime(h_proj['updated_at'],
                                                       datetime_format))
            if h_proj['starts_on']:
                h_proj.update(starts_on=datetime.strptime(
                    h_proj['starts_on'], date_format))
            if h_proj['ends_on']:
                h_proj.update(
                    ends_on=datetime.strptime(h_proj['ends_on'], date_format))

            # Get Data Warehouse id for Client
            dw_client = get_client_by_id(identifier=h_proj['client_id'])
            h_proj.update(client_id=dw_client.id)

            # Get Forecast id
            for idx, f_proj in enumerate(forecast_projects_list):
                if h_proj['harvest_id'] == f_proj['harvest_id']:
                    h_proj.update(forecast_id=f_proj['id'])
                    forecast_projects_list.pop(idx)
                else:
                    pass

        # For each Project record, check if in db and update/insert accordingly
        print('Writing Projects')
        logger.print_progress_bar(iteration=0,
                                  total=len(harvest_projects_list))
        for idx, proj in enumerate(harvest_projects_list):
            harvest_id = proj['harvest_id']

            # If a Project is in db update, otherwise insert
            try:
                pr = Project.get(harvest_id=harvest_id)
                if pr:
                    pr.set(**proj)
                else:
                    npr = Project(
                        harvest_id=proj['harvest_id'],
                        forecast_id=proj['forecast_id'],
                        name=proj['name'],
                        code=proj['code'],
                        client_id=proj['client_id'],
                        client_name=proj['client_name'],
                        is_active=proj['is_active'],
                        is_billable=proj['is_billable'],
                        budget=proj['budget'],
                        budget_is_monthly=proj['budget_is_monthly'],
                        created_at=proj['created_at'],
                        updated_at=proj['updated_at'],
                        starts_on=proj['starts_on'],
                        ends_on=proj['ends_on'],
                    )
                db.commit()
            except Exception as e:
                desc = "Project Entry Error - id: {}".format(
                    proj['harvest_id'])
                logger.write_load_completion(documents=str(e),
                                             description=desc)
            #Update on-screen progress bar
            logger.print_progress_bar(iteration=idx + 1,
                                      total=len(harvest_projects_list))

        # Cycle through remaining Forecast Projects to update records
        for f_proj in forecast_projects_list:
            f_proj.update(forecast_id=f_proj.pop('id'))
            f_proj.update(updated_at=datetime.strptime(f_proj['updated_at'],
                                                       datetime_format_ms))
            if f_proj['starts_on']:
                f_proj.update(starts_on=datetime.strptime(
                    f_proj['starts_on'], date_format))
            if f_proj['ends_on']:
                f_proj.update(
                    ends_on=datetime.strptime(f_proj['ends_on'], date_format))

            # If it has a harvest id, just update the forecast id of the data warehouse project
            if f_proj['harvest_id']:
                pr = Project.get(harvest_id=f_proj['harvest_id'])
                pr.forecast_id = f_proj['forecast_id']
            else:
                # If it doesn't have a harvest id, transform data and insert/update data warehouse
                is_active = not f_proj.pop('archived')
                f_proj.update(is_active=is_active)

                # Check for a client ID. If no result is returned, set client to RevUnit
                if f_proj['client_id']:
                    dw_client = get_client_by_id(f_proj['client_id'])
                    f_proj.update(client_id=dw_client.id)
                    f_proj.update(client_name=dw_client.name)
                else:
                    f_proj.update(client_id=164)
                    f_proj.update(client_name='RevUnit')

                # Update or insert the orphan Forecast client
                try:
                    fpr = Project.get(forecast_id=f_proj['forecast_id'])
                    if fpr:
                        fpr.set(**f_proj)
                    else:
                        nfpr = Project(
                            forecast_id=f_proj['forecast_id'],
                            name=f_proj['name'],
                            code=f_proj['code'],
                            client_id=f_proj['client_id'],
                            client_name=f_proj['client_name'],
                            is_active=f_proj['is_active'],
                            updated_at=f_proj['updated_at'],
                            starts_on=f_proj['starts_on'],
                            ends_on=f_proj['ends_on'],
                        )
                except Exception as e:
                    desc = "Forecast Project Entry Error - id: {}".format(
                        f_proj['forecast_id'])
                    logger.write_load_completion(documents=str(e),
                                                 description=desc)
            db.commit()
コード例 #7
0
ファイル: datamunger.py プロジェクト: crbikebike/data-rocket
    def munge_client(self):
        """Pulls Harvest and Forecast Clients and inserts/updates records

        :return:
        """
        updated_since = self.client_last_updated
        harvest_clients = self.harv.get_harvest_clients(
            updated_since=updated_since)
        harvest_client_list = harvest_clients['clients']
        forecast_clients = self.fore.get_forecast_clients()
        forecast_clients_list = forecast_clients['clients']
        # Trim Forecast list based on updated_since var
        forecast_clients_list = self.__trim_forecast_results__(
            f_result_set=forecast_clients_list, trim_datetime=updated_since)

        # Update Primary Key, get Forecast ID for each record
        for h_client in harvest_client_list:
            h_client.update(harvest_id=h_client.pop('id'))
            h_client.update(forecast_id=None)
            # Convert the date keys into Python date objects so ORM can use them
            h_client.update(created_at=datetime.strptime(
                h_client['created_at'], datetime_format))
            h_client.update(updated_at=datetime.strptime(
                h_client['updated_at'], datetime_format))

            for idx, f_client in enumerate(forecast_clients_list):
                if h_client['harvest_id'] == f_client['harvest_id']:
                    h_client.update(forecast_id=f_client['id'])
                    forecast_clients_list.pop(idx)
                else:
                    pass

        # For each Client record, check if in db and update/insert accordingly
        print('Writing Clients')
        logger.print_progress_bar(iteration=0, total=len(harvest_client_list))
        for idx, client in enumerate(harvest_client_list):
            harvest_id = client['harvest_id']

            # If a Client is in db update, otherwise insert
            try:
                c = Client.get(harvest_id=harvest_id)
                if c:
                    c.set(**client)
                else:
                    nc = Client(harvest_id=harvest_id,
                                forecast_id=client['forecast_id'],
                                name=client['name'],
                                is_active=client['is_active'],
                                created_at=client['created_at'],
                                updated_at=client['updated_at'])
                # Commit the record
                db.commit()
            except Exception as e:
                desc = "Client Entry Error - id: {}".format(
                    client['harvest_id'])
                logger.write_load_completion(documents=str(e),
                                             description=desc)

            # Update the on-screen progress bar
            logger.print_progress_bar(iteration=idx + 1,
                                      total=len(harvest_client_list))

        # Cycle through remaining Forecast clients to update forecast_id, if needed
        for f_client in forecast_clients_list:
            is_active = not f_client.pop('archived')
            f_client.update(is_active=is_active)
            f_client.update(forecast_id=f_client.pop('id'))
            f_client.update(updated_at=datetime.strptime(
                f_client['updated_at'], datetime_format_ms))

            if f_client['harvest_id']:
                c = Client.get(harvest_id=f_client['harvest_id'])
                c.forecast_id = f_client['forecast_id']
            else:
                try:
                    fc = Client.get(forecast_id=f_client['forecast_id'])
                    # Update or insert the orphan Forecast client
                    if fc:
                        fc.set(**f_client)
                    else:
                        nfc = Client(forecast_id=f_client['forecast_id'],
                                     name=f_client['name'],
                                     is_active=f_client['is_active'],
                                     updated_at=f_client['updated_at'])
                except:
                    desc = "Forecast Client Entry Error - id: {}".format(
                        f_client['forecast_id'])
                    logger.write_load_completion(documents=str(e),
                                                 description=desc)