def flush_queue_to_database(self): for file_name, data_list, data_insert_function in [ ('pull_request_data.json', self.pull_request_data, self.insert_pull_requests), ('review_decisions_data.json', self.review_decisions_data, self.insert_comments_and_reviews), ('labels_data.json', self.labels_data, self.insert_labels) ]: if not len(data_list): continue json_path = os.path.join(self.json_data_directory, file_name) with open(json_path, 'w') as json_file: for item in data_list: item = flatten_json(item) for key in item.keys(): if isinstance(item[key], str) and key not in ('author_login', 'id', 'pull_request_id', 'name'): if key == 'id': print(item[key]) input_string = item[key] item[key] = ' '.join([re.sub(r'\W+', '', s) for s in input_string.split()]).replace('"', '') string = json.dumps(item, ensure_ascii=True, separators=(',', ':'), default=str) + '\n' json_file.write(string) with session_scope() as db_session: db_session.execute('TRUNCATE etl_data;') with open(json_path, 'rb') as fp: postgres_copy.copy_from(fp, ETLData, db_session.connection(), ['data']) data_insert_function() self.pull_request_data = [] self.review_decisions_data = [] self.labels_data = []
def insert(cls, pull_request_id: str, diff: str): bdiff = diff.encode('utf-8') diff_hash = hashlib.sha256(bdiff).hexdigest() with session_scope() as session: try: (session.query(Diffs).filter( and_(Diffs.pull_request_id == pull_request_id, Diffs.diff_hash == diff_hash)).one()) except NoResultFound: record = Diffs() record.pull_request_id = pull_request_id record.diff_hash = diff_hash record.diff = diff patch = PatchSet(diff) record.added_lines = patch.added record.removed_lines = patch.removed record.added_files = len(patch.added_files) record.modified_files = len(patch.modified_files) record.removed_files = len(patch.removed_files) session.add(record) (session.query(Diffs).filter( and_(Diffs.diff_hash != diff_hash, Diffs.pull_request_id == pull_request_id)).update( {Diffs.is_most_recent: False}))
def upsert(pull_request_id: str, data: dict): with session_scope() as session: try: record = ( session.query(Labels) .filter(Labels.id == data['id']) .one() ) except NoResultFound: record = Labels() record.pull_request_id = pull_request_id session.add(record) for key, value in data.items(): setattr(record, key, value) try: m2m_record = ( session.query(PullRequestsLabels) .filter( and_( PullRequestsLabels.label_id == data['id'], PullRequestsLabels.pull_request_id == pull_request_id ) ) .one() ) except NoResultFound: m2m_record = PullRequestsLabels() m2m_record.pull_request_id = pull_request_id m2m_record.label_id = data['id'] session.add(m2m_record)
def process_invoice_data(invoice_data): log.debug('process_invoice_data', invoice_data=invoice_data) with session_scope(echo=True) as db_session: invoice = db_session.query(Invoices).filter( Invoices.id == invoice_data['id']).one() invoice.status = invoice_data['status'] invoice.data = invoice_data
def generate_invoice(self, bounty_id: str, recipient_user_id: str): with session_scope() as db_session: bounty: Bounties = db_session.query(Bounties).filter(Bounties.id == bounty_id).one() recipient: Users = db_session.query(Users).filter(Users.id == recipient_user_id).one() if recipient.btcpay_client is None: flash(f'{recipient.best_name} does not have BTCPay configured here. Elsewhere they may have other ways ' f'of receiving payments (Patreon, static address, etc).') return redirect(url_for('bounties-payable.index_view')) try: recipient_btcpay = RecipientBTCPay(client=recipient.btcpay_client) invoice_data = recipient_btcpay.get_pull_request_invoice( amount=bounty.amount, bounty_id=bounty_id, pull_request_number=bounty.pull_request.number ) invoice_model = Invoices() invoice_model.bounty_id = bounty.id invoice_model.id = invoice_data['id'] invoice_model.status = invoice_data['status'] invoice_model.url = invoice_data['url'] invoice_model.recipient_user_id = recipient_user_id invoice_model.payer_user_id = bounty.payer_user_id db_session.add(invoice_model) return redirect(invoice_model.url) except RequestException as e: log.debug('RequestException', exception=e, request=e.request, response=e.response) try: r: Response = e.response flash(f'{r.status_code} - {r.text}', category='error') except AttributeError as e: flash('Request error') return redirect(url_for('users.index_view'))
def get_review_count(pull_request_id: str, pull_request_author_id) -> int: with session_scope() as session: review_count = (session.query(func.count(Comments.id)).filter( and_(Comments.pull_request_id == pull_request_id, Comments.review_decision != ReviewDecision.NONE, Comments.author_id != pull_request_author_id)).scalar()) return review_count
def insert_labels(): with session_scope() as db_session: db_session.execute( """ WITH etl_data AS ( SELECT DISTINCT etl_data.data ->> 'id' AS id, etl_data.data ->> 'name' AS "name", etl_data.data ->> 'color' AS color FROM etl_data ) INSERT INTO labels (id, "name", color) SELECT id, name, color FROM etl_data ON CONFLICT (id) DO UPDATE SET name = excluded.name, color = excluded.color; WITH etl_data AS ( SELECT DISTINCT etl_data.data ->> 'id' AS label_id, etl_data.data ->> 'pull_request_id' AS pull_request_id FROM etl_data LEFT OUTER JOIN pull_requests_labels ON etl_data.data ->> 'id' = pull_requests_labels.label_id AND etl_data.data ->> 'pull_request_id' = pull_requests_labels.pull_request_id WHERE pull_requests_labels.id IS NULL ) INSERT INTO pull_requests_labels (label_id, pull_request_id) SELECT label_id, pull_request_id FROM etl_data; """ )
def delete(pull_request_id: str): with session_scope() as session: ( session .query(PullRequestsLabels) .filter(PullRequestsLabels.pull_request_id == pull_request_id) .delete() )
def update_review_count_cache(pull_request_id: str, pull_request_author_id: str): review_count = CommentsData.get_review_count( pull_request_id=pull_request_id, pull_request_author_id=pull_request_author_id) with session_scope() as session: record = (session.query(PullRequests).filter( PullRequests.id == pull_request_id).one()) record.review_decisions_count = review_count
def is_polling(self) -> bool: with session_scope() as session: try: record = ( session.query(ServicePolling) .filter(ServicePolling.service == self.service) .filter(ServicePolling.stopped_at.is_(None)) .one() ) return True except NoResultFound: return False
def stop(self): with session_scope() as session: try: record = ( session.query(ServicePolling) .filter(ServicePolling.service == self.service) .filter(ServicePolling.stopped_at.is_(None)) .one() ) record.stopped_at = datetime.utcnow() except NoResultFound: pass
def on_model_change(self, form, model: Bounties, is_created: bool): model.id = uuid4().hex model.published_at = datetime.utcnow() model.payer_user_id = current_user.id model.recipient_user_id = model.pull_request.author_id with session_scope() as session: total_bounty_amount = (session.query( coalesce(func.sum(Bounties.amount), 0)).filter( Bounties.pull_request_id == model.pull_request.id).one() )[0] log.debug('total_satoshis', total_bounty_amount=total_bounty_amount) model.pull_request.total_bounty_amount = total_bounty_amount + model.amount
def upsert(self, data: dict): with session_scope() as session: try: record = (session.query(PullRequests).filter( and_(PullRequests.repository_id == self.repo.id, PullRequests.number == data['number'])).one()) except NoResultFound: record = PullRequests() record.repository_id = self.repo.id record.number = data['number'] session.add(record) for key, value in data.items(): setattr(record, key, value)
def __init__(self, repository_path: str, repository_name: str): super(RepositoriesData, self).__init__() with session_scope() as session: try: repository = (session.query(Repositories).filter( and_(Repositories.path == repository_path, Repositories.name == repository_name)).one()) except NoResultFound: repository = Repositories() repository.path = repository_path repository.name = repository_name session.add(repository) session.flush() session.expunge(repository) self.repo = repository
def update(self): with session_scope() as session: try: record = ( session .query(PullRequests.updated_at) .order_by(PullRequests.updated_at.desc()) .limit(1) .one() ) from_date = record.updated_at except NoResultFound: from_date = datetime(2009, 1, 1) log.debug('Updating PRs starting from', from_date=from_date) self.update_all(newer_than=from_date)
def update(self, last_event: bool = False, last_open_update: bool = False, last_full_update: bool = False): with session_scope() as session: try: record = (session.query(Polling).filter( Polling.repository_id == self.repo.id).one()) except NoResultFound: record = Polling() record.repository_id = self.repo.id session.add(record) if last_event: record.last_event = datetime.utcnow() if last_open_update: record.last_open_update = datetime.utcnow() if last_full_update: record.last_full_update = datetime.utcnow()
def upsert(self, data: dict) -> str: with session_scope() as session: try: user_record = (session.query(Users).filter( Users.login == data['login']).one()) except NoResultFound: # if the login is not in the db, query github to get the ID data = self.get(login=data['login']) try: user_record = (session.query(Users).filter( Users.id == data['id']).one()) except NoResultFound: user_record = Users() user_record.id = data['id'] session.add(user_record) for key, value in data.items(): setattr(user_record, key, value) session.commit() return user_record.id
def upsert(self, pull_request_id: str, data: dict) -> bool: review_decision = self.identify_review_decision(data['body']) author = data.pop('author') author_id = UsersData().upsert(data=author) with session_scope() as session: try: record = (session.query(Comments).filter( Comments.id == data['id']).one()) except NoResultFound: record = Comments() record.pull_request_id = pull_request_id record.author_id = author_id session.add(record) for key, value in data.items(): setattr(record, key, value) record.auto_detected_review_decision = review_decision if review_decision == ReviewDecision.NONE: return False return True
def insert_comments_and_reviews(self): with session_scope() as db_session: missing_authors = db_session.execute( """ SELECT DISTINCT etl_data.data ->> 'author_login' FROM etl_data LEFT OUTER JOIN users ON etl_data.data ->> 'author_login' = users.login WHERE users.id IS NULL; """ ).fetchall() if missing_authors: log.debug('missing_authors', missing_authors=missing_authors, count=len(missing_authors)) for author in missing_authors: login = author[0] if login is None: continue user_data = self.users_data.get(login) self.users_data.upsert(user_data) with session_scope() as db_session: db_session.execute( """ WITH etl_data AS ( SELECT DISTINCT etl_data.data ->> 'id' AS id, etl_data.data ->> 'bodyText' AS body, (etl_data.data ->> 'publishedAt')::timestamp with time zone AS published_at, etl_data.data ->> 'url' AS url, etl_data.data ->> 'pull_request_id' AS pull_request_id, users.id AS author_id, split_part(etl_data.data ->> 'review_decision', '.', 2)::reviewdecision AS auto_detected_review_decision FROM etl_data LEFT OUTER JOIN users ON etl_data.data ->> 'author_login' = users.login ) INSERT INTO comments (id, body, published_at, url, pull_request_id, author_id, auto_detected_review_decision) SELECT * FROM etl_data ON CONFLICT (id) DO UPDATE SET id = excluded.id, body = excluded.body, published_at = excluded.published_at, url = excluded.url, pull_request_id = excluded.pull_request_id, author_id = excluded.author_id, auto_detected_review_decision = excluded.auto_detected_review_decision ; """ ) with session_scope() as db_session: db_session.execute( """ WITH etl_data AS ( SELECT DISTINCT etl_data.data ->> 'pull_request_id' AS pull_request_id FROM etl_data ) UPDATE pull_requests SET review_decisions_count = s.review_decisions_count from (SELECT count(comments.id) as review_decisions_count, etl_data.pull_request_id FROM etl_data LEFT JOIN comments on etl_data.pull_request_id = comments.pull_request_id AND comments.auto_detected_review_decision is not null and comments.auto_detected_review_decision != 'NONE'::reviewdecision GROUP BY etl_data.pull_request_id) s WHERE s.pull_request_id = pull_requests.id; """ )
polling_data = PollingData('github') if polling_data.is_polling(): log.warn('GitHub is already being polled') sys.exit(0) polling_data.start() if args.pr_number is not None: pull_requests_data.update(number=args.pr_number) elif args.state is not None: args.state = PullRequestState[args.state] pull_requests_data.update_all(state=args.state, limit=args.limit) elif args.high_priority: with session_scope() as session: record = ( session .query(PullRequests.number) .filter( and_(PullRequests.is_high_priority.isnot(None)) ) .all() ) for r in record: pull_requests_data.update(number=int(r.number)) elif args.old: with session_scope() as session: try: record = ( session
def start(self): new_record = ServicePolling(service=self.service, started_at=datetime.utcnow()) with session_scope() as session: session.add(new_record)
def insert_pull_requests(self): with session_scope() as db_session: missing_authors = db_session.execute( """ SELECT DISTINCT epr.data ->> 'author_login' FROM etl_data epr LEFT OUTER JOIN users authors ON epr.data ->> 'author_login' = authors.login WHERE authors.id IS NULL; """ ).fetchall() if missing_authors: log.debug('missing_authors', missing_authors=missing_authors, count=len(missing_authors)) for author in missing_authors: login = author[0] if login is None: continue user_data = self.users_data.get(login) self.users_data.upsert(user_data) with session_scope() as db_session: db_session.execute(""" WITH etl_data AS ( SELECT DISTINCT epr.data ->> 'id' AS id, (epr.data ->> 'repository_id')::int AS repository_id, author.id AS author_id, (epr.data ->> 'number')::int AS "number", epr.data ->> 'state' AS "state", epr.data ->> 'title' AS title, (epr.data ->> 'createdAt')::timestamp with time zone AS created_at, (epr.data ->> 'updatedAt')::timestamp with time zone AS updated_at, (epr.data ->> 'is_high_priority')::timestamp with time zone AS is_high_priority, (epr.data ->> 'added_to_high_priority')::timestamp with time zone AS added_to_high_priority, (epr.data ->> 'removed_from_high_priority')::timestamp with time zone AS removed_from_high_priority, (epr.data ->> 'additions')::int AS additions, (epr.data ->> 'deletions')::int AS deletions, epr.data ->> 'mergeable' AS mergeable, epr.data ->> 'last_commit_state' AS last_commit_state, epr.data ->> 'last_commit_state_description' AS last_commit_state_description, epr.data ->> 'last_commit_short_hash' AS last_commit_short_hash, (epr.data ->> 'last_commit_pushed_date')::timestamp with time zone AS last_commit_pushed_date, epr.data ->> 'bodyText' AS body, (epr.data ->> 'mergedAt')::timestamp with time zone AS merged_at, (epr.data ->> 'closedAt')::timestamp with time zone AS closed_at, (epr.data ->> 'commit_count')::int AS commit_count FROM etl_data epr LEFT OUTER JOIN users author ON epr.data ->> 'author_login' = author.login ) INSERT INTO pull_requests (id, repository_id, author_id, "number", "state", title, created_at, updated_at, is_high_priority, added_to_high_priority, removed_from_high_priority, additions, deletions, mergeable, last_commit_state, last_commit_state_description, last_commit_short_hash, last_commit_pushed_date, body, merged_at, closed_at, commit_count) SELECT * FROM etl_data ON CONFLICT ON CONSTRAINT pull_requests_unique_constraint DO UPDATE SET repository_id = excluded.repository_id, author_id = excluded.author_id, "number" = excluded.number, "state" = excluded.state, title = excluded.title, created_at = excluded.created_at, updated_at = excluded.updated_at, is_high_priority = excluded.is_high_priority, added_to_high_priority = excluded.added_to_high_priority, removed_from_high_priority = excluded.removed_from_high_priority, additions = excluded.additions, deletions = excluded.deletions, mergeable = excluded.mergeable, last_commit_state = excluded.last_commit_state, last_commit_state_description = excluded.last_commit_state_description, last_commit_short_hash = excluded.last_commit_short_hash, last_commit_pushed_date = excluded.last_commit_pushed_date, body = excluded.body, merged_at = excluded.merged_at, closed_at = excluded.closed_at, commit_count = excluded.commit_count ;""")