def create_article(art_dict, max_comments): """ Create article and comments from article json """ args = { k: v for k, v in art_dict.items() if k in ["title", "body", "text", "slug", "user", "tweet_id", "url"] } args["created_at"] = parse_date(art_dict) args["metadata"] = art_dict["description"] art = Article(**args) art.save() new_comments = [] elligible_comments = get_elligible_comments(art_dict["comments"]) sampled_comments = random.sample( elligible_comments, min(max_comments, len(elligible_comments))) for comm in sampled_comments: args = { k: v for k, v in comm.items() if k in ["text", "user_id", "tweet_id"] } args["created_at"] = parse_date(comm) args["text"] = preprocess_tweet(comm["text"]) comm = Comment(**args) comm.article = art new_comments.append(comm) Comment.objects.bulk_create(new_comments)
def post(self, request, *args, **kwargs): aid = request.POST.get('id') content = request.POST.get('content') tags = request.POST.get('tags') slug = request.POST.get('slug', '') cid = request.POST.get('classification') publish = request.POST.get('publish', False) title = request.POST.get('title') slug = unicode(slug).replace(' ', '-').lower() if not unicode(aid).isdigit(): aid = 0 article = Article.objects.filter(id=aid) if article.exists(): article = article[0] article.tags.clear() else: article = Article() article.publish = publish article.content = content classification = Classification.objects.get(id=cid) article.classification = classification article.title = title article.slug = slug article.save() for itm in tags: if itm and itm != '': tag = Tag.objects.filter(id=itm) if tag.exists(): article.tags.add(tag[0]) article.save() return self.render_to_response(dict())
def post(self, request, *args, **kwargs): aid = request.POST.get('id') content = request.POST.get('content') tags = request.POST.get('tags') cid = request.POST.get('classification') publish = request.POST.get('publish', False) title = request.POST.get('title') if not unicode(aid).isdigit(): aid = 0 article = Article.objects.filter(id=aid) if article.exists(): article = article[0] article.tags.clear() else: article = Article() article.publish = publish article.content = content classification = Classification.objects.get(id=cid) article.classification = classification article.title = title article.save() for itm in tags: if itm and itm != '': tag = Tag.objects.filter(id=itm) if tag.exists(): article.tags.add(tag[0]) article.save() return self.render_to_response(dict())
def test_article_creation(self): """ Test creation of an article in a category """ old_count = Article.objects.count() new_article = Article(url="http://www.dummy.com", title="Dummy title", category=self.category, owner=self.user) new_article.save() new_count = Article.objects.count() self.assertEqual(new_count, old_count + 1) self.assertTrue(isinstance(new_article, Article))
def setUp(self): """ Define the test client and initial variables for the test suite """ self.user = User.objects.create(username="******") self.client = APIClient() self.client.force_authenticate(user=self.user) self.category = Category(name="Test", description="This is a test category", owner=self.user) self.category.save() self.article = Article(url="http://www.dummy.com", title="Dummy title", category=self.category, owner=self.user) self.article_data = { "url": "http://www.dummy.com", "title": "Dummy Title", "description": "A sample description", "read_status": False, "owner": self.user.id } article_url = '/api/v2/categories/{}/articles/'.format( self.category.id) self.response = self.client.post(article_url, self.article_data, format="json")
def save_articles(article_list, source): for article in article_list[u'articles']: data = { 'author': article[r'author'], 'title': article[r'title'], 'content': article[u'description'], 'url': article[u'url'], 'image_url': article[u'urlToImage'], 'published_at': article[u'publishedAt'], 'source': source } if all([ data['author'] is not None, data['content'] is not None, data['image_url'] is not None ]): a = Article(**data) a.save()
def test_deserialize_article_to_article_instance(client, database): time: datetime = datetime.now() input_time: str = time.isoformat() article_data: dict = dict(title="Test Title", content="Post body", date_created=input_time) actual: Article = article_schema.load(article_data).data expected: Article = Article(title="Test Title", content="Post body", date_created=time) assert actual.title == expected.title assert actual.slug == expected.slug assert actual.content == expected.content assert actual.date_created == expected.date_created
def test_serialize_article_to_dict(): """ GIVEN an Article object created from a dictionary WHEN that object is serialized THEN return a dictionary containing the data from that object, including data that is initialized in the Article constructor """ time: datetime = datetime.now(timezone.utc) article: Article = Article(title="Test Title", content="Post body", date_created=time) actual: dict = article_schema.dump(article).data expected: dict = dict( title="Test Title", slug="test-title", content="Post body", date_created=time.isoformat(), ) for key in expected.keys(): assert actual[key] == expected[key]
def create(self, validated_data): article_tags = validated_data.pop('tags', None) article = Article(**validated_data) article.author = self.context['request'].user article.save() for request_tag in article_tags: tag_name = request_tag.get('name') if not Tag.objects.filter(name=tag_name).exists(): tag = create_tag(request_tag) else: tag = Tag.objects.filter(name=tag_name)[0] at = ArticleTag.objects.create( article=article, tag=tag, ) at.save() article.save() return article
def test_create_article(): """ GIVEN the model for an article WHEN article data is passed in THEN return a database model for that article. """ # create time here so that actual and expected have same time; if created in model, time will differ. time: datetime = datetime.now(timezone.utc) actual: Article = Article(title="Test Title", content="Post body", date_created=time) expected: dict = { "title": "Test Title", "slug": "test-title", "content": "Post body", "date_created": time, } assert actual.title == expected["title"] assert actual.slug == expected["slug"] assert actual.content == expected["content"]
def handle(self, *args, **options): res = requests.get('http://www.rapospectre.com/migrate') json_data = json.loads(res.content) blog_list = json_data.get('body').get('blog') for itm in blog_list: if Article.objects.filter(title=itm.get('caption')).exists(): continue c_name = itm.get('classification').get('c_name') classifi = Classification.objects.filter(title=c_name) if not classifi.exists(): classifi = Classification(title=c_name).save() else: classifi = classifi[0] if not classifi: classifi = Classification.objects.all()[0] create_time = datetime.datetime.strptime(itm.get('create_time'), '%Y-%m-%d %H:%M:%S') create_time = create_time.replace(tzinfo=get_current_timezone()) Article(title=itm.get('caption'), content=itm.get('content'), publish=True, create_time=create_time, classification=classifi, views=int(itm.get('read_count'))).save()
def get_aiticle(request): article = Article() return HttpResponse("success")
def create(self, validated_data): article = Article(**validated_data) article.save() return article
def get_article_from_authors(author_name, token): if author_name == "Anonymous": return from django.conf import settings import django import requests from django.core.exceptions import ObjectDoesNotExist from api.models import Article, Authors from django.db.models import ObjectDoesNotExist import json import time if Authors.objects.get(name=author_name).done: return headers = {"Authorization": 'Bearer ' + token} ids = [] try: response = requests.get( f'https://api.mendeley.com/search/catalog?author={author_name}&view=all&open_access=True&limit=100', headers=headers).json() except requests.exceptions.ConnectionError: print("okay_bitxh") time.sleep(5) response = requests.get( f'https://api.mendeley.com/search/catalog?author={author_name}&view=all&open_access=True&limit=100', headers=headers).json() for i, res in enumerate(response): print(i) if 'identifiers' not in res: continue if 'authors' in res: authors = [] for i in res['authors']: f_name = None if 'first_name' not in i or i[ 'first_name'] == '' else i['first_name'] l_name = None if 'last_name' not in i or i[ 'last_name'] == '' else i['last_name'] if f_name or l_name: first = f_name if f_name else '' last = l_name if l_name else '' try: author = Authors.objects.get(name=first + " " + last) except ObjectDoesNotExist: author = Authors(name=first + " " + last, done=False) else: try: author = Authors.objects.get(name='Anonymous') except ObjectDoesNotExist: author = Authors(name='Anonymous', done=False) author.save() authors.append(author) else: authors = [] author, cr = Authors.objects.get_or_create(name='Anonymous') author.save() authors.append(author) try: article = Article.objects.get(id=res['id']) except ObjectDoesNotExist: article = Article( title=res['title'], type=res['type'], id=res['id'], year=res['year'] if 'year' in res else 2000, source=res['source'] if 'source' in res else "Not specified", publisher=res['publisher'] if 'publisher' in res else 'Anonymous', identifiers=res['identifiers'], link=res['link'], pdf=res['pdf'] if 'pdf' in res else None, abstract=get_abstract(res['identifiers']['doi']) if 'doi' in res['identifiers'] else "" # keywords = res["keywords"][:6] if "keywords" in res else None, # reader_count = res['reader_count'] if 'reader_count' in res else 0, # reader_count_by_academic_status = res['reader_count_by_academic_status'] if 'reader_count_by_academic_status' in res else '{}', # reader_count_by_subject_area = res['reader_count_by_subject_area'] if 'reader_count_by_subject_area' in res else '{}', # reader_count_by_country = res['reader_count_by_country'] if 'reader_count_by_country' in res else '{}', ) try: article.save() for i in authors: article.authors.add(i) except django.db.utils.DataError: pass ids.append(article.pk) return ids
def get_data_by_query(token, query, limit=100): from django.conf import settings import requests from api.models import Article, Authors from django.db.models import ObjectDoesNotExist import json import time time.sleep(0.1) ids = [] headers = {"Authorization": 'Bearer ' + token} try: response = requests.get( f'https://api.mendeley.com/search/catalog?query={query}&open_access=True&limit={limit}', headers=headers).json() except requests.exceptions.ConnectionError: time.sleep(5) response = requests.get( f'https://api.mendeley.com/search/catalog?query={query}&open_access=True&limit={limit}', headers=headers).json() for i, res in enumerate(response): print(i, "ok") if 'identifiers' not in res: continue if 'authors' in res: authors = [] for i in res['authors']: f_name = None if 'first_name' not in i or i[ 'first_name'] == '' else i['first_name'] l_name = None if 'last_name' not in i or i[ 'last_name'] == '' else i['last_name'] if f_name or l_name: first = f_name if f_name else '' last = l_name if l_name else '' author, cr = Authors.objects.get_or_create(name=first + " " + last, done=False) else: author, cr = Authors.objects.get_or_create( name='Anonymous', done=False) author.save() authors.append(author) else: authors = [] author, cr = Authors.objects.get_or_create(name='Anonymous', done=False) author.save() authors.append(author) try: article = Article.objects.get(id=res['id']) except ObjectDoesNotExist: abstract = get_abstract(res['identifiers']['doi'] ) if 'doi' in res['identifiers'] else "" article = Article( title=res['title'], type=res['type'], id=res['id'], year=res['year'] if 'year' in res else 2000, source=res['source'] if 'source' in res else "Not specified", publisher=res['publisher'] if 'publisher' in res else 'Anonymous', identifiers=res['identifiers'], link=res['link'], pdf=res['pdf'] if 'pdf' in res else None, keywords=res["keywords"][:6] if "keywords" in res else None, abstract=abstract # reader_count = res['reader_count'] if 'reader_count' in res else 0, # reader_count_by_academic_status = res['reader_count_by_academic_status'] if 'reader_count_by_academic_status' in res else '{}', # reader_count_by_subject_area = res['reader_count_by_subject_area'] if 'reader_count_by_subject_area' in res else '{}', # reader_count_by_country = res['reader_count_by_country'] if 'reader_count_by_country' in res else '{}', ) article.save() for i in authors: article.authors.add(i) ids.append(article.pk) return ids
def get_data(token): from django.conf import settings import requests from api.models import Article, Authors from django.db.models import ObjectDoesNotExist import json import time time.sleep(0.1) headers = {"Authorization": 'Bearer ' + token} for key, val in settings.SUBDISCIPLINES.items(): for sub in val: limit = 100 query = sub.replace(" ", "+").lower() print(query) try: response = requests.get( f'https://api.mendeley.com/search/catalog?query={query}&view=all&open_access=True&limit={limit}', headers=headers).json() except requests.exceptions.ConnectionError: print("okay_bitxh") time.sleep(5) response = requests.get( f'https://api.mendeley.com/search/catalog?query={query}&view=all&open_access=True&limit={limit}', headers=headers).json() for i, res in enumerate(response): print(i) if 'identifiers' not in res: continue if 'authors' in res: authors = [] for i in res['authors']: f_name = None if 'first_name' not in i or i[ 'first_name'] == '' else i['first_name'] l_name = None if 'last_name' not in i or i[ 'last_name'] == '' else i['last_name'] if f_name or l_name: first = f_name if f_name else '' last = l_name if l_name else '' author, cr = Authors.objects.get_or_create( name=first + " " + last, done=False) else: author, cr = Authors.objects.get_or_create( name='Anonymous', done=False) author.save() authors.append(author) else: authors = [] author, cr = Authors.objects.get_or_create( name='Anonymous', done=False) author.save() authors.append(author) try: article = Article.objects.get(id=res['id']) except ObjectDoesNotExist: abstract = get_abstract( res['identifiers'] ['doi']) if 'doi' in res['identifiers'] else "" article = Article( title=res['title'], type=res['type'], id=res['id'], year=res['year'] if 'year' in res else 2000, source=res['source'] if 'source' in res else "Not specified", publisher=res['publisher'] if 'publisher' in res else 'Anonymous', identifiers=res['identifiers'], link=res['link'], pdf=res['pdf'] if 'pdf' in res else None, keywords=res["keywords"][:6] if "keywords" in res else None, abstract=abstract) article.save() for i in authors: article.authors.add(i)
def create(self, validated_data, author_id): article = Article(**validated_data) article.author_id = author_id article.save() return article
'url': url, 'body_text': str(body_text.text).strip().replace('\n', '') }) else: errors.append({ 'error': f'Не верный статускод {response.status_code} возможно изменился url' }) if __name__ == '__main__': """ Получаем все статьи, если статьи получены, то удаляем старые и сохраняем новые Поскольку время на реализацию ограничено, то сделано именно так. Если есть ошибки, то сохраняем их в базу. """ parser() if articles: all_article = Article.objects.all() all_article.delete() for article_item in articles: article = Article(**article_item) article.save() if errors: for error_item in errors: error = ParserError(**error_item) error.save()