def clean_pdf(file): researchgate_1 = "ResearchGate" researchgate_2 = "Some of the authors of this publication are also working on these related projects" researchgate_3 = "CITATIONS" researchgate_4 = "READS" researchgate_strings = ( researchgate_1, researchgate_2, researchgate_3, researchgate_4, ) try: doc = fitz.open(stream=file.read(), filetype="pdf") if doc.pageCount <= 1: return found_items = 0 first_page = doc[0] for researchgate_str in researchgate_strings: if first_page.searchFor(researchgate_str): found_items += 1 if found_items >= 3: doc.deletePage(0) pdf_bytes = io.BytesIO(doc.write()) file.file = pdf_bytes except Exception as e: sentry.log_error(e) finally: file.seek(0)
def get_promoted(self, obj): if self.context.get('exclude_promoted_score', False): return None try: return obj.get_promoted_score() except Exception as e: log_error(e) return None
def get_user_score(user_id): try: response = client.score(user_id) out = json.dumps(response.body) print(out) except sift.client.ApiException as e: sentry.log_error(e) print(e.api_error_message)
def unlabel_user(user_id): # TODO: Finish this by determing how we plan to use it try: response = client.unlabel(user_id, abuse_type='content_abuse') print(response.body) except sift.client.ApiException as e: sentry.log_error(e) print(e.api_error_message)
def attach_file_to_document(sender, instance, created, update_fields, **kwargs): if not created and check_file_updated(update_fields): try: if instance.file is not None: response = pdf_pipeline.attach_paper_pdf(instance) return response.ok except Exception as e: sentry.log_error(e, 'Failed to attach file to document')
def pdf_url_to_journal_url(cls, pdf_url): parts = pdf_url.split(cls.pdf_url_split_on_partial) try: uid = cls.remove_query(parts[1]) journal_url = f'{cls.journal_url_base}{uid}{cls.journal_url_suffix}' return journal_url except Exception as e: sentry.log_error(e, message=pdf_url) return None
def _build_pipeline_if_not_exists(self): try: if self._check_pipeline_exists() is False: self._build_pipeline() except Exception as e: sentry.log_error( e, 'Failed to build pipeline' )
def pdf_url_to_journal_url(cls, pdf_url): parts = pdf_url.split(cls.pdf_url_split_on) try: uid_parts = cls.remove_query(parts[1]).split(cls.pdf_url_suffix) uid = uid_parts[0] return f'{cls.journal_url_base}{uid}' except Exception as e: sentry.log_error(e, message=pdf_url) return None
def registry_update_related_task(pk, app_label, model_name): try: model = apps.get_model(app_label, model_name) except LookupError as e: sentry.log_error(e) pass else: registry.update_related( model.objects.get(pk=pk) )
def journal_url_to_pdf_url(cls, journal_url): parts = journal_url.split(cls.journal_url_split_on) try: uid = parts[1] uid = cls.remove_query(uid) pdf_url = f'{cls.pdf_url_base}{uid}{cls.pdf_url_suffix}' return pdf_url except Exception as e: sentry.log_error(e, message=journal_url) return None
def _add_citation(self, user, hypothesis_id, unified_document, citation_type): try: hypothesis = Hypothesis.objects.get(id=hypothesis_id) citation = Citation.objects.create(created_by=user, source=unified_document, citation_type=citation_type) citation.hypothesis.set([hypothesis]) except Exception as e: sentry.log_error(e)
def get_uploaded_by(self, hit): try: paper = Paper.objects.get(id=hit["id"]) uploaded_by = paper.uploaded_by if uploaded_by: user = User.objects.get(id=paper.uploaded_by.id) return UserSerializer(user, read_only=True).data return None except Exception as e: log_error(e, "Paper is missing uploaded_by")
def get_author_profile(self, user): context = self.context _context_fields = context.get("usr_dus_get_author_profile", {}) try: serializer = DynamicAuthorSerializer(user.author_profile, context=context, **_context_fields) return serializer.data except Exception as e: sentry.log_error(e) return {}
def get_latest_submission_date(self, hub): try: return ( Contribution.objects.filter( contribution_type=Contribution.SUBMITTER, unified_document__hubs=hub.id ).latest('created_date').created_date ) except Exception as error: log_error(error) return None
def __call__(self, execute, sql, params, many, context): start = time.monotonic() self.start = start try: result = execute(sql, params, many, context) except Exception as e: sentry.log_error(e) raise self.capture_traceback() return result
def mailchimp_add_user(request, user, **kwargs): """Adds user email to MailChimp""" mailchimp = Client() mailchimp.set_config({ 'api_key': keys.MAILCHIMP_KEY, 'server': MAILCHIMP_SERVER }) try: member_info = {'email_address': user.email, 'status': 'subscribed'} mailchimp.lists.add_list_member(MAILCHIMP_LIST_ID, member_info) except Exception as error: sentry.log_error(error, message=error.text)
def generate_distribution(self, item, amount=1, distribute=True): from paper.models import Paper, Vote from user.models import User, Author from bullet_point.models import BulletPoint, Vote as BulletPointVote from summary.models import Summary, Vote as SummaryVote from discussion.models import Thread, Comment, Reply item_type = type(item) if item_type is Contribution: content_type = item.content_type try: item = content_type.get_object_for_this_type(id=item.object_id) item_type = type(item) except Exception as e: print(e) return None if item_type is Paper: recipient = item.uploaded_by elif item_type is BulletPoint: recipient = item.created_by elif item_type is BulletPointVote: recipient = item.created_by elif item_type is Summary: recipient = item.proposed_by elif item_type is SummaryVote: recipient = item.created_by elif item_type is Vote: recipient = item.created_by elif item_type is User: recipient = item elif item_type is Author: recipient = item.user elif item_type in (Thread, Comment, Reply): recipient = item.created_by else: error = Exception(f'Missing instance type: {str(item_type)}') sentry.log_error(error) raise error distributor = Distributor(dist('REWARD', amount, False), recipient, item, time.time()) if distribute: distribution = distributor.distribute() else: distribution = distributor return distribution
def apply_bad_content_decision(self, content_creator, content_id, source='AUTOMATED_RULE', reporter=None): applyDecisionRequest = { 'decision_id': 'content_looks_bad_content_abuse', 'source': source, 'analyst': reporter.email if reporter else '*****@*****.**', 'description': 'Auto flag of moderator-removed content', 'reason': 'Auto flag of moderator-removed content', } try: client.apply_content_decision(str(content_creator.id), content_id, applyDecisionRequest) except sift.client.ApiException as e: sentry.log_error(e) print(e)
def apply_bad_user_decision(self, content_creator, source='AUTOMATED_RULE', reporter=None): applyDecisionRequest = { 'decision_id': 'looks_bad_content_abuse', 'source': source, 'analyst': reporter.email if reporter else '*****@*****.**', 'description': 'User looks risky for content abuse', 'reason': 'User looks risky for content abuse', } try: client.apply_user_decision(str(content_creator.id), applyDecisionRequest) except sift.client.ApiException as e: sentry.log_error(e) print(e)
def get_discussion_hubs(instance): hubs = None if isinstance(instance, BulletPoint): hubs = instance.paper.hubs elif isinstance(instance, Comment): hubs = instance.parent.paper.hubs elif isinstance(instance, Reply): try: hubs = instance.parent.parent.paper.hubs except Exception as e: sentry.log_error(e) elif isinstance(instance, Thread): hubs = instance.paper.hubs return hubs
def distribute(self): record = self._record_distribution() try: record.set_distributed_pending() self._update_reputation_and_balance(record) record.set_distributed() except Exception as e: record.set_distributed_failed() error_message = f'Distribution {record.id} failed' error = ReputationDistributorError(e, error_message) sentry.log_error(error) print(error_message, e) return record
def celery_track_content_comment( user_id, comment_id, comment_type, meta, is_thread, update, ): User = apps.get_model('user.User') user = User.objects.get(id=user_id) Discussion = apps.get_model(f'discussion.{comment_type}') comment = Discussion.objects.get(id=comment_id) root_content_id = '' if comment.paper is not None: root_content_id = ( f'{type(comment.paper).__name__}_{comment.paper.id}' ) comment_properties = { # Required fields '$user_id': str(user.id), # must be unique across all content types '$content_id': f'{type(comment).__name__}_{comment.id}', # Recommended fields '$status': '$active', # Required $comment object '$comment': { '$body': comment.plain_text, '$contact_email': user.email, '$root_content_id': root_content_id, } } if not is_thread: comment_properties['$comment']['$parent_comment_id'] = ( f'{type(comment.parent).__name__}_{comment.parent.id}' ) track_type = '$update_content' if update else '$create_content' try: response = client.track(track_type, comment_properties, return_score=False) print(response.body) return response.body except sift.client.ApiException as e: sentry.log_error(e) print(e.api_error_message)
def label_bad_user(user_id, abuse_type, description=''): # TODO: Finish this by determing how we plan to use it try: response = client.label(user_id, { '$is_bad': True, # optional fields '$abuse_type': abuse_type, '$description': description, '$source': 'django', '$analyst': '*****@*****.**' }) print(response.body) except sift.client.ApiException as e: sentry.log_error(e) print(e.api_error_message)
def after_approval_flow(case_id): instance = AuthorClaimCase.objects.get(id=case_id) if instance.status == APPROVED: try: requestor_author = instance.requestor.author_profile reward_author_claim_case(requestor_author, instance.target_paper, instance) if instance.target_paper is None: raise Exception( f'Cannot approve claim because paper was not found') send_approval_email(instance) except Exception as exception: sentry.log_error(exception)
def trigger_email_validation_flow(case_id, ): instance = AuthorClaimCase.objects.get(id=case_id) if instance.status == INITIATED: try: [generated_time, token] = get_new_validation_token() instance.token_generated_time = generated_time instance.validation_token = token # Note: intentionally sending email before incrementing attempt send_validation_email(instance) instance.validation_attempt_count += 1 instance.save() except Exception as exception: print('exception', exception) sentry.log_error(exception)
def track_flag_content(self, user, content_id, referer_id): # https://sift.com/developers/docs/curl/events-api/reserved-events/flag-content if not user: return None properties = { '$user_id': str(user.id), '$content_id': content_id, '$flagged_by': str(referer_id), } try: response = client.track('$flag_content', properties) print(response.body) except sift.client.ApiException as e: sentry.log_error(e) print(e.api_error_message)
def email_notifications(request): """Handles AWS SNS email notifications.""" data = request.data if type(request.data) is not dict: data = json.loads(request.data) data_type = None try: data_type = data['Type'] except KeyError: raise ParseError(f'Did not find key `Type` in {data}') if data_type == 'SubscriptionConfirmation': url = data['SubscribeURL'] resp = http_request('GET', url) if resp.status_code != 200: message = 'Failed to subscribe to SNS' log_request_error(resp, message) elif data_type == 'Notification': data_message = json.loads(data['Message']) if data_message['notificationType'] == 'Bounce': bounced_recipients = data_message['bounce']['bouncedRecipients'] for b_r in bounced_recipients: email_address = b_r['emailAddress'] # TODO: Sanitize email address before putting it in the db try: recipient, created = EmailRecipient.objects.get_or_create( email=email_address) recipient.bounced() except Exception as e: message = ( f'Failed handling bounced recipient: {email_address}') error = EmailNotificationError(e, message) print(error) log_error(error, base_error=e) elif data_type == 'Complaint': message = (f'`email_notifications` received {data_type}') log_info(message) else: message = ( f'`email_notifications` received unsupported type {data_type}') print(message) return Response({})
def log_traceback(data, total_view_time, traceback): queries = data['queries'] view_name = data['view_name'] path = data['path'] http_method = data['http_method'] total_queries = str(data['total_queries']) total_sql_time = data['total_time'] total_view_time = total_view_time try: profile = Profile.objects.create( view_name=view_name, path=path, http_method=http_method, total_queries=total_queries, total_sql_time=total_sql_time, total_view_time=total_view_time ) filename = f'/tmp/trace_logs/{str(datetime.now())}.log' with open(filename, 'wb+') as f: f.write(traceback.encode()) Traceback.objects.create( profile=profile, choice_type=Traceback.VIEW_TRACE, time=total_view_time, trace=File(f) ) os.remove(filename) for i, query in enumerate(queries): choice_type = Traceback.SQL_TRACE sql = query.get('sql') trace = query.get('traceback', '') time = query.get('time') filename = f'/tmp/trace_logs/{str(datetime.now())}.log' with open(filename, 'wb+') as f: f.write(trace.encode()) Traceback.objects.create( profile=profile, choice_type=choice_type, time=time, trace=File(f), sql=sql ) os.remove(filename) except Exception as e: sentry.log_error(e)
def journal_url_to_pdf_url(cls, journal_url): subdomain = cls.get_subdomain(journal_url) try: sub_key = SUB_KEYS[subdomain] except KeyError: sub_key = subdomain parts = journal_url.split(cls.journal_url_split_on) try: uid = parts[1] uid = cls.remove_query(uid) pdf_url_base = cls.build_pdf_url_base(subdomain, sub_key) pdf_url = f'{pdf_url_base}{uid}{cls.pdf_url_suffix}' return pdf_url except Exception as e: sentry.log_error(e, message=journal_url) return None
def pdf_url_to_journal_url(cls, pdf_url): subdomain = cls.get_subdomain(pdf_url) try: sub_key = SUB_KEYS[subdomain] except KeyError: sub_key = subdomain pdf_url_split_on = cls.build_pdf_url_split_on(sub_key) parts = pdf_url.split(pdf_url_split_on) try: uid_parts = cls.remove_query(parts[1]).split(cls.pdf_url_suffix) uid = uid_parts[0] journal_url_base = cls.build_journal_url_base(subdomain) journal_url = f'{journal_url_base}{uid}' return journal_url except Exception as e: sentry.log_error(e, message=pdf_url) return None