def test_basics(self): for url, output in TESTS: assert ensure_protocol(url) == output assert ensure_protocol('lemonde.fr?utm_hp_ref=test', 'ftp') == 'ftp://lemonde.fr?utm_hp_ref=test' assert ensure_protocol('https://lemonde.fr?utm_hp_ref=test', 'ftp') == 'https://lemonde.fr?utm_hp_ref=test' assert ensure_protocol('lemonde.fr?utm_hp_ref=test', 'http://') == 'http://lemonde.fr?utm_hp_ref=test'
def crowdtangle_posts_by_id_action(namespace, output_file): client = CrowdTangleClient(namespace.token, rate_limit=namespace.rate_limit) already_done = 0 def listener(event, row): nonlocal already_done if event == 'resume.input': already_done += 1 enricher = casanova.enricher( namespace.file, output_file, keep=namespace.select, add=CROWDTANGLE_POST_CSV_HEADERS, resumable=namespace.resume, listener=listener ) loading_bar = tqdm( desc='Retrieving posts', dynamic_ncols=True, total=namespace.total, unit=' posts' ) loading_bar.update(already_done) loading_bar_context = LoadingBarContext(loading_bar) try: for row, url in enricher.cells(namespace.column, with_rows=True): with loading_bar_context: url = url.strip() if not url: enricher.writerow(row) continue url = ensure_protocol(url) if not is_facebook_post_url(url): enricher.writerow(row) continue post_id = facebook.post_id_from_url(url) if post_id is None: enricher.writerow(row) continue post = client.post(post_id, format='csv_row') enricher.writerow(row, post) except CrowdTangleInvalidTokenError: die([ 'Your API token is invalid.', 'Check that you indicated a valid one using the `--token` argument.' ])
def domain_from_url(url): # print(' -> DOMAIN FROM URL:', ensure_protocol(url)[:30], type(url)) parsed_url = urlparse(ensure_protocol(url)) result = parsed_url.netloc.split('@') if len(result) == 1: return result[0] else: return result[1]
def create(self, request): #checks if url field is blank if request.data.get('url') == None: return Response({"message": "URL field cannot be blank"}, status=status.HTTP_400_BAD_REQUEST) #proccesses a malformed url,rejects values that do not form valid url schemed_url = ensure_protocol( request.data.get('url'), protocol='https') if is_url(schemed_url) == False: return Response({"message": "Enter a valid url"}, status=status.HTTP_400_BAD_REQUEST) #checks if name input contains only letters,numbers,underscore and hyphen pattern = "^[A-Za-z0-9_-]*$" name=request.data.get('name') if name is not None and bool(re.match(pattern, name)) == False: return Response({"message": "Name can contain only letters,numbers,underscore and hyphen"}, status=status.HTTP_400_BAD_REQUEST) # maps request data to serializer class to get an object serializer = serializers.MemeSerializer(data={"name": request.data.get( 'name'), "url": schemed_url, "caption": request.data.get("caption")}) # checks validity of the serializer object whether all required fields are present if serializer.is_valid(): # extract the various parameters sent in request data creator = serializer.data.get('name') caption = serializer.data.get('caption') url = serializer.data.get('url') # set creationDateTime,creationDate,lastUpdate as current date and time creationDateTime = timezone.now() creationDate = date.today() updatedDateTime = timezone.now() # create a meme object with data extracted obj = Meme(caption=caption, url=schemed_url, name=creator, creationDateTime=creationDateTime, creationDate=creationDate, lastUpdate=updatedDateTime) # check if meme object already exists query_obj2 = Meme.objects.filter(url=schemed_url).filter( name=creator).filter(caption=caption) if len(query_obj2) >= 1: return Response({'message': 'This meme already exists'}, status=status.HTTP_409_CONFLICT) # if meme object does not exit create a new meme by saving it to database obj.save() # get the id of the meme object created postCounter = obj.id # return the id of the meme object created with accepted status code return Response({'id': str(postCounter)}, status=status.HTTP_201_CREATED) else: """if any required data was missing or if serializer object could not be created,return the exact serialization error that occured with bad request status code""" return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
def payloads(): for item in iterator: url = item if key is None else key(item) if not url: yield FetchWorkerPayload(http=http, item=item, url=None) continue # Url cleanup url = ensure_protocol(url.strip()) yield FetchWorkerPayload(http=http, item=item, url=url)
def payloads_iter(iterator, key=None): for item in iterator: url = item if key is None else key(item) if not url: yield FetchWorkerPayload( item=item, domain=None, url=None ) continue # Url cleanup url = ensure_protocol(url.strip()) yield FetchWorkerPayload( item=item, domain=get_domain_name(url), url=url )
def partial_update(self, request, pk=None): #if id input is anything other than positive integer,return status bad request try: val = int(pk) except ValueError: return Response({"message": "Enter positive number"}, status=status.HTTP_400_BAD_REQUEST) #if id input is negative integer,return status bad request if int(pk) < 0: return Response({"message": "Enter positive number"}, status=status.HTTP_400_BAD_REQUEST) # get meme object by its id queryset = Meme.objects.filter(id=pk) # check if meme object exists,if it does not return http status not found if len(queryset) == 0: return Response(status=status.HTTP_404_NOT_FOUND) if request.data.get('name') != None: return Response({"message": "Creator name cannot be changed!!"}, status=status.HTTP_400_BAD_REQUEST) if request.data.get('url') == None and request.data.get('caption') == None: return Response({"message": "Both url and caption cannot be none"}, status=status.HTTP_400_BAD_REQUEST) url = request.data.get('url') caption = request.data.get('caption') # if only caption is supplied,only caption is updated if url == None and caption != None: obj = queryset[0] obj.caption = caption obj.lastUpdate = timezone.now() obj.save() # return response no content if successfully updated return Response(status=status.HTTP_204_NO_CONTENT) # if url entered has no scheme,add scheme to url and check if a valid url is formed schemed_url = ensure_protocol( request.data.get('url'), protocol='https') if is_url(schemed_url) == False: return Response({"message": "Enter a valid url"}, status=status.HTTP_400_BAD_REQUEST) # if meme with that id exists,map request data to serializer to extract attributes based on data supplied # ie either url or caption or both if url != None and caption != None: serializer = serializers.MemeUpdateSerializer( data={"url": schemed_url, "caption": request.data.get("caption")}, partial=True) else: serializer = serializers.MemeUpdateSerializer( data={"url": schemed_url}, partial=True) # check if serializer object is valid i.e all required fields are present and no extra fields are present if serializer.is_valid(): obj = queryset[0] # extract the caption and url of the meme object oldCaption = obj.caption oldUrl = obj.url # set caption and url sent as request to new caption and new url newCaption = serializer.data.get('caption') newUrl = serializer.data.get('url') # check if new caption is not same as existing caption.If not then update caption field of meme object. if newCaption is not None and newCaption != oldCaption: obj.caption = newCaption # check if new url is not same as existing url.If not then update url field of meme object. if newUrl is not None and newUrl != oldUrl: obj.url = newUrl # if any of the fields were updated,set lastUpdate field of the object to current date and time if newUrl != oldUrl or newCaption != oldCaption: obj.lastUpdate = timezone.now() # save the meme object obj.save() # return response no content if successfully updated return Response(status=status.HTTP_204_NO_CONTENT) else: return Response(serializer.errors, status=status.HTTP_404_NOT_FOUND)