def gen_datetime(min_date=now_add_days(-365), max_date=now_add_days(365)): min_date = time.mktime(min_date.timetuple()) max_date = time.mktime(max_date.timetuple()) for __ in itertools.count(): random_time = min_date + random.random() * (max_date - min_date) if settings.USE_TZ: yield tz_datetime.fromtimestamp(time.mktime(time.localtime(random_time))).replace(tzinfo=utc) else: yield tz_datetime.fromtimestamp(time.mktime(time.localtime(random_time)))
class FieldType: django_to_proto_type = { models.CharField: 'string', models.FloatField: 'double', models.IntegerField: 'int32', models.AutoField: 'int32', models.BigIntegerField: 'int64', models.PositiveIntegerField: 'uint32', models.PositiveBigIntegerField: 'uint64', models.FileField: 'string', models.DateTimeField: 'uint64', models.DateField: 'uint64', models.ForeignKey: 'uint16', models.BooleanField: 'bool', } serializers = { models.FileField: lambda file: file.url, models.DateTimeField: lambda dt: int(dt.timestamp() * 1000), models.DateField: lambda dt: int(datetime(dt.year, dt.month, dt.day).timestamp() * 1000), } deserializers = { models.FileField: lambda file: file.url, models.DateTimeField: lambda ts: datetime.fromtimestamp(ts / 1000.0), models.DateField: lambda ts: datetime.fromtimestamp(ts / 1000.0).date(), } def __init__(self, django_type): self.djange_type = django_type self.proto_type = FieldType.django_to_proto_type[django_type] def serialize(self, value): try: return FieldType.serializers.get(self.djange_type, lambda x: x)(value) except: print('error serializing', value) return None def deserialize(self, value): try: return FieldType.deserializers.get(self.proto_type, lambda x: x)(value) except: print('error deserializing', value) return None
def default(self, o): from time import struct_time, mktime # See "Date Time String Format" in the ECMA-262 specification. if isinstance(o, struct_time): return self.default(datetime.fromtimestamp(mktime(o))) else: return super(CustomJSONEncoder, self).default(o)
def rss(source_url, newer_then=None): rss_data = _rss_parse(source_url) entries = [dict( html=entry.summary, published=tz_dt.fromtimestamp(mktime(entry.published_parsed)) ) for entry in rss_data.entries] if newer_then: return [entry for entry in entries if entry['published'] > newer_then] return entries
def get_rank_board_datas(self): """ 获取滚榜数据(比赛服阉割版) :return: """ import time from django.utils.timezone import datetime asgn = self.asgn # 获取题目数据 aproblem_list = asgn.problems.order_by("index") # 题目索引参照表 apindex_list = {} for aproblem in aproblem_list: apindex_list[aproblem.entity.id] = aproblem.index parser = ParamsParser(self._request) # 获取最后一次刷新的时间 last_time = parser.get_float('time', 0) try: last_time = datetime.fromtimestamp(last_time) except Exception as ex: last_time = 0 user_list = {} if type(last_time) is datetime: # 设置了时间,就是弹时间线 judge_status = asgn.judge_status.filter( create_time__gt=last_time).order_by("id") else: # 没有,就是弹初始化数据 judge_status = asgn.judge_status.filter( create_time__lte=datetime.now()) reports = EducationModel.AsgnReport.objects.filter(asgn=asgn) for report in reports: user_list[str(report.author.id)] = report.json(items=[ 'id', 'author', 'author__id', 'author__nickname', 'author__username', 'author__realname', 'author__headimg', 'author__sex', 'start_time' ]) judge_status_list = [{ "id": status.id, "problem_id": status.problem_id, "user_id": status.author_id, "flag": status.flag, "timestamp": int(status.create_time.timestamp()) } for status in judge_status] return { "problem_indexs": apindex_list, "problem_list": [aproblem.entity.id for aproblem in aproblem_list], "judge_status": judge_status_list, "user_list": user_list, "nowtime": time.time() }
def main(): with open('bildeliste2') as f: images = f.readlines() # matchvalue = None # shuffle(images) # images = images[:100] imagedict = {} duplicates = 0 for img in images: if re.search('/(reklame|banner|elm|2014/2013|slettmeg)/', img): continue year = issue = None fields = img.split() filesize_in_bytes = int(fields[0]) unix_timestamp = int(fields[5]) filedate = datetime.fromtimestamp(unix_timestamp) filepath = ''.join(fields[6:]) filename = filepath.split('/')[-1] # logger.debug("{} {} {}".format(size, unix_timestamp, path)) year_issue_match = ( re.match(r'\./(?P<year>\d{4})/(?P<issue>\d{1,2})/(?P=issue)\D', filepath) or re.match(r'\./(?P<year>\d{4})/(?P<issue>\d{1,2})/', filepath) or re.match(r'\./(?P<year>\d{4})/(?P<issue>\d{2})[^\d/]', filepath) or None) if year_issue_match: # matchvalue = 1 year = year_issue_match.group('year') issue = year_issue_match.group('issue') else: issue_match = re.search(r'\/(?P<issue>\d{2})[^/\d]', filepath) if issue_match: # matchvalue = 2 issue = issue_match.group('issue') year = filedate.year else: # matchvalue = 3 year = filedate.year issue = '00' # logger.debug('year: {}, issue: {}, file: {}'.format(issuematch.group('year'), issuematch.group('issue'), filename)) if year and issue: image_file = ImageFile(filepath, filesize_in_bytes, filedate, issue, year) if filename in imagedict: # old_filepath = imagedict[filename] duplicates += 1 else: imagedict[filename] = image_file else: msg = 'No match {}'.format(filepath) logger.debug(msg) msg = 'all: {0} placed: {1} duplicates: {2}'.format( len(images), len(imagedict), duplicates) logger.debug(msg)
def save_report_date(request): user = User.objects.get(username=request.user) if user.is_staff: try: counteragent = Counteragent.objects.get(alias=request.POST['counteragent']) date_time = datetime.fromtimestamp(int(request.POST['datetime'].split('.')[0])) last_report_datetime = LastReportDatetime(counteragent=counteragent, date_time=date_time) last_report_datetime.save() return {'result': "Saved"} except Exception as ex: return {'result': "Error while saving" } else: return {'result': "У вас нет прав на сохранение даты" }
def random_datetime(first_datetime, second_datetime): """ Returns random datetime between first_datetime and second_datetime. """ if second_datetime < first_datetime: aux_datetime = first_datetime first_datetime = second_datetime second_datetime = aux_datetime first_timestamp = int(first_datetime.timestamp()) second_timestamp = int(second_datetime.timestamp()) random_timestamp = randint(first_timestamp, second_timestamp) naive_date_time = datetime.fromtimestamp(random_timestamp) date_time = make_aware(naive_date_time) return date_time
def get_feedly_article(request_content): from posts.models import FeedlyAPISettings, Post, NewsAggregator from django.contrib.auth import get_user_model from posts.utils import get_favicon feedly_settings = FeedlyAPISettings.get_solo() feedly = feedly_settings.get_client() article = feedly.get_entry(feedly_settings.FEEDLY_API_ACCESS_TOKEN, request_content.get('entryId'))[0] try: Post.objects.get(title=request_content.get('title', None)) except Post.DoesNotExist: origin_url = urlparse(article.get('origin').get('htmlUrl')) if 'google' in origin_url.netloc: article['origin']['htmlUrl'] = article.get('alternate')[0]['href'] if article.get('alternate', None) else article.get( 'canonicalUrl') article['origin']['title'] = urlparse(article['origin']['htmlUrl']).netloc try: news_aggregator = NewsAggregator.objects.get(name=article.get('origin').get('title')) except NewsAggregator.DoesNotExist: na_url = urlparse(article['origin']['htmlUrl']) news_aggregator = NewsAggregator.objects.create(name=article.get('origin').get('title'), url=na_url.scheme + '://' + na_url.netloc) temp_image = get_favicon(na_url.geturl()) if temp_image: news_aggregator.logo.save( origin_url.netloc + "_logo", File(temp_image, name=origin_url.netloc + "_logo")) news_aggregator.save() if request_content.get('title', False) and news_aggregator: post = Post.objects.create(submitter=get_user_model().objects.filter(is_superuser=True)[0], author=request_content.get('author', None), title=request_content.get('title'), news_aggregator=news_aggregator, submit_time=datetime.fromtimestamp( request_content.get('publishedTimestamp') / 1000.0), url=article['canonicalUrl'] if article.get('canonicalUrl', None) else article.get('alternate')[0]['href'], feedly_engagement=article.get('engagement', 100)) article_image = request_content.get('visualUrl', None) if article_image: post.image_url = article_image post.save()
def update_items(self, items, item_data): if len(items) > 1: raise ValueError( "There should only ever be exactly one-to-one " "relationships between items in RSS feeds." ) item = items[0] published_at = item_data.pop("published_parsed", None) updated_at = item_data.pop("updated_parsed", None) item_data = self.prepare_item_content(item_data) h = HTMLParser() item.title = h.unescape(item_data["title"]) item.displayed_from = datetime.fromtimestamp(mktime(published_at or updated_at)) item.source_content = json.dumps(item_data, sort_keys=True) item.source_url = item_data.get("link") or item_data.get("id") item.source_id = item_data.get("id") or item_data.get("link") item.last_read_at = now()
def update_items(self, items, item_data): if len(items) > 1: raise ValueError('There should only ever be exactly one-to-one ' 'relationships between items in RSS feeds.') item = items[0] published_at = item_data.pop('published_parsed', None) updated_at = item_data.pop('updated_parsed', None) item_data = self.prepare_item_content(item_data) h = HTMLParser() item.title = h.unescape(item_data['title']) item.displayed_from = datetime.fromtimestamp( mktime(published_at or updated_at)) item.source_content = json.dumps(item_data, sort_keys=True) item.source_url = item_data.get('link') or item_data.get('id') item.last_read_at = now() item.save()
def build_table_row(request, obj, admin_class): row_html = '' for strcolumn in admin_class.list_display: field_obj = obj._meta.get_field(strcolumn) if field_obj.choices: tddata = getattr(obj, 'get_%s_display' % strcolumn)() else: tddata = getattr(obj, strcolumn) if type(tddata).__name__ == "datetime": tddata = datetime.fromtimestamp(tddata.timestamp()) tddata = tddata.strftime("%Y-%m-%d %H:%M:%S") row_html += "<td>%s</td>" % tddata editUrl = "%s/%s/change" % (request.path, obj.id) row_html += "<td><a href='%s'><span class='fa fa-edit'></span></a></td>" % editUrl for actionButton in admin_class.action_buttons: actionButtonEle = getattr(obj, actionButton)(obj.id) row_html += "<td>%s</td>" % actionButtonEle return mark_safe(row_html)
def entry_to_messages(self, entry): content_r = entry.get('content') if isinstance(content_r, list): content_join = [] for c in content_r: if isinstance(c, dict): content_join.append(c.get('value')) content = "\r\n".join(content_join) if any(content_join) else None else: content = None title = entry.get('title') if not title: title = None contents = filter( lambda c: c is not None, (title, content, entry.get('summary'), entry.get('link'))) message_text = "\r\n".join(contents) message_date = entry.get('published_parsed') or entry.get( 'updated_parsed') message_date = datetime.fromtimestamp( mktime(message_date)).replace(tzinfo=pytz.UTC) or datetime.now() author = entry.get('author') return Message(text=message_text, date=message_date, username=author)
def collect_job(self, name): job = self.server.get_job_info(name=name) try: last_success_build_num = job['lastSuccessfulBuild']['number'] except BaseException as error: last_success_build_num = None # print(last_success_build_num) try: last_build_num = job['lastBuild']['number'] except BaseException as error: return False # print(last_build_num) last_build_info = self.server.get_build_info(name, last_build_num) last_build_console = self.server.get_build_console_output( name, last_build_num) last_build_status = last_build_info['result'] if not last_build_status: last_build_status = "RUNNING" ts = last_build_info['timestamp'] sp = float(str(ts)[0:-3] + '.' + str(ts)[-3:]) last_build_time = datetime.strftime(datetime.fromtimestamp(sp), "%Y-%m-%d %H:%M:%S") return { 'app_name': name, 'build_status': last_build_status, 'last_build_time': last_build_time, 'build_console_output': last_build_console, 'last_success_build_num': last_success_build_num, 'last_build_num': last_build_num }
def transform_data(self, data_integer): data = datetime.fromtimestamp(data_integer) return data
def process_post(row, config, posts_update_run): lgr.info('\n\nROW - Published: {}, Id: {}'.format(row.published, row.id)) row_last_edited = make_aware( datetime.fromtimestamp(int(row.get('last_edited_time')) / 1000)) lgr.info('row_last_edited: {}'.format(row_last_edited)) # registering a change callback # row.add_callback(changed_callback) try: post = Post.objects.get(nt_block__reference=row.id) lgr.info('editing existing post') post.is_published = row.published post_nt_block = post.nt_block post_nt_block.updated_run = posts_update_run post_nt_block.updated_at = row_last_edited post_nt_block.save() if not row.published or (post_nt_block.updated_at == row_last_edited): post.save() # lgr.info('skipped unpublished or un-updated') # continue except Post.DoesNotExist: lgr.info('creating new post') post = Post() post.is_published = row.published post.site_id = 1 post.creator_id = 1 try: post_nt_block = Block.objects.get(reference=row.id) except Block.DoesNotExist: post_nt_block = Block() post_nt_block.reference = row.id post_nt_block.config = config post_nt_block.updated_at = row_last_edited post_nt_block.updated_run = posts_update_run lgr.info('saving post: {}'.format(post)) post.save() lgr.info('saved post: {}'.format(post)) lgr.info('saving post_nt_block: {}'.format(post_nt_block)) post_nt_block.post = post post_nt_block.save() lgr.info('saved post_nt_block: {}'.format(post_nt_block)) post.heading = row.name # post.sub_heading = row. # post.category = category post.save() for tag_name in row.tags: tag, created = Tag.objects.get_or_create(name=tag_name, site_id=1) post.tags.add(tag) # Retrieving Page content # Blocks children_update_run = timezone.now() level = 0 for child in row.children: block = parse_block(child, config, children_update_run, level) level += 1 post.body.add(block) # deleted = BlogBlock.objects.filter(nt_block__updated_run__lt=children_update_run).delete() deleted = post.body.filter( nt_block__updated_run__lt=children_update_run).delete() lgr.info(deleted)
def parse_block(child, config, children_update_run, level): save = True new_block_nt_block = False child_last_edited = make_aware( datetime.fromtimestamp(int(child.get('last_edited_time')) / 1000)) try: block = BlogBlock.objects.get(nt_block__reference=child.id) lgr.info('parse_block existing') block_nt_block = block.nt_block block_nt_block.updated_run = children_update_run if block_nt_block.updated_at == child_last_edited: # block.save() # continue # return None pass except BlogBlock.DoesNotExist: block = BlogBlock() new_block_nt_block = True lgr.info('parse_block creating new') try: block_nt_block = Block.objects.get(reference=child.id) except Block.DoesNotExist: block_nt_block = Block() block_nt_block.reference = child.id block_nt_block.config = config block_nt_block.updated_run = children_update_run block_nt_block.updated_at = child_last_edited block.level = level if isinstance(child, ColumnListBlock): block.type = BlogBlock.COLUMN_LIST_BLOCK # todo save config save = False block.save() column_block_level = 0 for column_block in child.children: lgr.info(column_block) cb = parse_block(column_block, config, children_update_run, column_block_level) block.children.add(cb) column_block_level += 1 column_block_child_level = 0 for column_block_child in column_block.children: lgr.info(column_block_child) cbc = parse_block(column_block_child, config, children_update_run, column_block_child_level) cb.children.add(cbc) column_block_child_level += 1 elif isinstance(child, ColumnBlock): block.type = BlogBlock.COLUMN_BLOCK # todo save config save = False block.save() elif isinstance(child, TextBlock): block.type = BlogBlock.TEXT block.config = {} block.config['content'] = child.title elif isinstance(child, DividerBlock): block.config = {} block.type = BlogBlock.DIVIDER elif isinstance(child, BulletedListBlock): block.type = BlogBlock.BULLETED_LIST block.config = {} block.config['content'] = child.title elif isinstance(child, NumberedListBlock): block.type = BlogBlock.NUMBERED_LIST_BLOCK block.config = {} block.config['content'] = child.title elif isinstance(child, ImageBlock): block.type = BlogBlock.IMAGE block.config = {} block.config['display_source'] = child.display_source block.config['file_id'] = child.file_id block.config['caption'] = child.caption block.config['full_width'] = child.full_width block.config['height'] = child.height block.config['page_width'] = child.page_width block.config['width'] = child.width elif isinstance(child, QuoteBlock): block.type = BlogBlock.QUOTE block.config = {} block.config['content'] = child.title elif isinstance(child, HeaderBlock): block.type = BlogBlock.HEADER block.config = {} block.config['content'] = child.title elif isinstance(child, SubheaderBlock): block.type = BlogBlock.SUB_HEADER block.config = {} block.config['content'] = child.title elif isinstance(child, SubsubheaderBlock): block.type = BlogBlock.SUB_SUB_HEADER block.config = {} block.config['content'] = child.title elif isinstance(child, CodeBlock): block.type = BlogBlock.CODE block.config = {} block.config['content'] = child.title block.config['language'] = child.language block.config['wrap'] = child.wrap elif isinstance(child, TodoBlock): block.type = BlogBlock.TODO block.config = {} block.config['content'] = child.title block.config['checked'] = child.checked else: lgr.info(child) lgr.info('Unknown block type: {}'.format(type(child))) if save: block.save() if new_block_nt_block: block_nt_block.block = block block_nt_block.save() return block
def get_feedly_articles(): from posts.models import FeedlyAPISettings, Post, NewsAggregator from django.contrib.auth import get_user_model from posts.utils import get_favicon feedly_settings = FeedlyAPISettings.get_solo() tag_on_feedly = 'WHN' feedly = feedly_settings.get_client() data = feedly.get_enterprise_user_tags(feedly_settings.FEEDLY_API_ACCESS_TOKEN) whn_tag_id = False for i in data: label = i.get('label', False) if label and label == tag_on_feedly: whn_tag_id = i.get('id', False) break if whn_tag_id: articles = feedly.get_feed_content( feedly_settings.FEEDLY_API_ACCESS_TOKEN, whn_tag_id ) if articles.get('items', False): admin = get_user_model().objects.filter(is_superuser=True)[0] last_entry_id = '' for article in articles['items']: try: post = Post.objects.get(title=article.get('title', '')) except Post.DoesNotExist: origin_url = urlparse(article.get('origin').get('htmlUrl')) if 'google' in origin_url.netloc: article['origin']['htmlUrl'] = article.get('alternate')[0]['href'] if article.get('alternate', None) else article.get('canonicalUrl') article['origin']['title'] = urlparse(article['origin']['htmlUrl']).netloc try: news_aggregator = NewsAggregator.objects.get(name=article.get('origin').get('title')) except NewsAggregator.DoesNotExist: na_url = urlparse(article['origin']['htmlUrl']) news_aggregator = NewsAggregator.objects.create(name=article.get('origin').get('title'), url=na_url.scheme + '://' + na_url.netloc) temp_image = get_favicon(article.get('origin').get('htmlUrl')) if temp_image: try: news_aggregator.logo.save(na_url.netloc + "_logo", File(temp_image)) except OSError: logging.warning("Can't save logo image for news_aggregator: " + str(news_aggregator.id) + '--' + str(news_aggregator.name)) news_aggregator.save() if article.get('title', False) and news_aggregator and article.get('unread', False): post = Post.objects.create(submitter=admin, title=article.get('title'), news_aggregator=news_aggregator, submit_time=datetime.fromtimestamp(article.get('published')/1000.0), url=article['canonicalUrl'] if article.get('canonicalUrl', None) else article.get('alternate')[0]['href'], feedly_engagement=article.get('engagement', 100)) else: raise Exception("Article doesn't have title.") article_image = article.get('visual', False) if article_image: post.image_url = article_image.get('url', None) post.save() last_entry_id = article.get('id') feedly.mark_tag_read(feedly_settings.FEEDLY_API_ACCESS_TOKEN, whn_tag_id, last_entry_id) else: raise Exception("No entries are found with '%s' tag." % tag_on_feedly) else: raise Exception("Can't find '%s' tag!" % tag_on_feedly)
except IndexError: print(f"Failed to get rfid for {email}! Skipping") skipped_members.append(email) continue full_name_list = user[3].split(" ") if full_name_list: first_name = full_name_list.pop(0) last_name = " ".join(full_name_list) else: print(f"Skipping {email} because no name was given!") skipped_members.append(email) continue # Get the correct joined and expired dates, and add them to the member date_joined = datetime.fromtimestamp(user[6]) date_expires = datetime.fromtimestamp(user[7]) # Select which group the member should be placed in mem_type = user[4].lower() status = user[5].lower() if mem_type == "member": if status == "new": group = groups["just joined"] elif status == "expired": group = groups["expired"] elif status == "active" and date_expires < datetime.now(): group = groups["expired"] elif status == "active" and date_expires > datetime.now(): group = groups["member"] elif (mem_type == "staff"
def post(self, request, *args, **kwargs): if request.POST.get('is_submitted'): cart_items = get_cart_items(self.request) total_weight = get_total_weight(cart_items=cart_items) client_data_cookie = self.request.COOKIES.get('client_data') client_data = cookie_parser(client_data_cookie) address = client_data.get('address') order = Order() order.client_name = address.get('client_name') order.payment = PaymentMethod.objects.get(method__exact=client_data.get('payment_method')) order.phone = address.get('phone') order.shipping_address = address.get('shipping_address') order.total_weight = total_weight order.delivery_price = get_delivery_price(cart_items) try: order.need_porter = int(client_data.get('need_porter')) except ValueError: pass if request.user.is_authenticated(): order.customer = request.user if order.need_porter: order.porter_work_price = get_delivery_price(cart_items) order.shipping_time = datetime.fromtimestamp(client_data.get('time'), tz=pytz.timezone(settings.TIME_ZONE)) order.save() total_price = 0 json_data = [] for cart_item in cart_items: cart_item.order = order total_price += cart_item.total_price cart_item.save() title = "{} {}".format(cart_item.variation.product.name, cart_item.variation.name) if cart_item.variation.color: title = "{} {}".format(title, cart_item.variation.color.name) json_data.append({ 'title': title, 'price': float(cart_item.variation.price), 'count': cart_item.count, 'total_price': float(cart_item.total_price) }) order.products = json_data order.products_price = Decimal(total_price) order.order_unique_id = uuid.uuid4() order.total_price = order.products_price + get_delivery_price(cart_items) order.save() if order.payment.method == PaymentMethod.CASH_ON_DELIVERY: return redirect(reverse_lazy('orders:order_detail', args=[order.phone, order.order_unique_id])) elif order.payment.method == PaymentMethod.PAYME: # https://checkout.paycom.uz/base64(m=587f72c72cac0d162c722ae2;ac.order_id=197;a=500) merchant_id = settings.PAYME_MERCHANT_ID ac_order_id = order.id ac_customer_id = order.phone amount = int(order.total_price.real) * 100 redirect_url_after_operation = reverse_lazy('main:home') ct = 15 cr = 860 data = "m={};ac.order_id={};ac.customer_id={};a={};c={}".format(merchant_id, ac_order_id, ac_customer_id, amount, redirect_url_after_operation) base64_data = base64.b64encode(data.encode('utf-8')).decode('utf-8') base64_data_url = "https://checkout.paycom.uz/{}".format(base64_data) return redirect(to=base64_data_url) elif order.payment.method == PaymentMethod.UZCARD: return redirect(reverse('orders:payment_uzcard', args=[order.phone, order.order_unique_id])) else: return redirect(reverse('main:home')) return redirect(reverse('main:home'))
def main(): with open('bildeliste2') as f: images = f.readlines() # matchvalue = None # shuffle(images) # images = images[:100] imagedict = {} duplicates = 0 for img in images: if re.search('/(reklame|banner|elm|2014/2013|slettmeg)/', img): continue year = issue = None fields = img.split() filesize_in_bytes = int(fields[0]) unix_timestamp = int(fields[5]) filedate = datetime.fromtimestamp(unix_timestamp) filepath = ''.join(fields[6:]) filename = filepath.split('/')[-1] # logger.debug("{} {} {}".format(size, unix_timestamp, path)) year_issue_match = ( re.match( r'\./(?P<year>\d{4})/(?P<issue>\d{1,2})/(?P=issue)\D', filepath) or re.match( r'\./(?P<year>\d{4})/(?P<issue>\d{1,2})/', filepath) or re.match( r'\./(?P<year>\d{4})/(?P<issue>\d{2})[^\d/]', filepath) or None) if year_issue_match: # matchvalue = 1 year = year_issue_match.group('year') issue = year_issue_match.group('issue') else: issue_match = re.search(r'\/(?P<issue>\d{2})[^/\d]', filepath) if issue_match: # matchvalue = 2 issue = issue_match.group('issue') year = filedate.year else: # matchvalue = 3 year = filedate.year issue = '00' # logger.debug('year: {}, issue: {}, file: {}'.format(issuematch.group('year'), issuematch.group('issue'), filename)) if year and issue: image_file = ImageFile( filepath, filesize_in_bytes, filedate, issue, year) if filename in imagedict: # old_filepath = imagedict[filename] duplicates += 1 else: imagedict[filename] = image_file else: msg = 'No match {}'.format(filepath) logger.debug(msg) msg = 'all: {0} placed: {1} duplicates: {2}'.format( len(images), len(imagedict), duplicates) logger.debug(msg)
def parse(self, response): object_id = self.object_id spider_id = cache.get('running_spider_id') if object_id != spider_id: logger.info('Spider is running.') self.crawled = 0 return sel = Selector(response) scripts = sel.xpath('//script') # 判断是否需要保存抓取的数据 statistics = self.get_dict(scripts, '#getStatisticsService') create_time = make_aware( datetime.fromtimestamp(statistics['createTime'] / 1000.0)) modify_time = make_aware( datetime.fromtimestamp(statistics['modifyTime'] / 1000.0)) qs = items.StatisticsItem.django_model.objects.all().order_by('-id') if qs.count() > 1 and qs[0].modifyTime == modify_time: logger.info('Data does not change.') self.crawled = 0 return # 统计信息 statistics = self.explain_statistics(statistics) statistics['createTime'] = create_time statistics['modifyTime'] = modify_time # 国内数据 provinces = self.get_list(scripts, '#getAreaStat') for province in provinces: cities = province.pop('cities', []) province.pop('locationId') yield scrapy.Request(province['statisticsData'], callback=self.parse_province_statistics_data, meta={ 'province': province, 'cities': cities }) # 时间线事件,id=“getTimelineService2” 为英文内容 timelines = self.get_list(scripts, '#getTimelineService1') result = [] for item in timelines: timeline = {} for key in ('title', 'summary', 'infoSource', 'sourceUrl', 'pubDate', 'pubDateStr'): timeline[key] = item.get(key) result.append(timeline) statistics['timelines'] = json.dumps(result) # 建议,id=“#getIndexRecommendList2” 为英文内容 recommends = self.get_list(scripts, '#getIndexRecommendListundefined') result = [] for item in recommends: recommend = {} for key in ('title', 'linkUrl', 'imgUrl', 'countryType', 'contentType', 'recordStatus', 'sort'): recommend[key] = item.get(key) result.append(recommend) statistics['recommends'] = json.dumps(result) # WHO 文章 item = self.get_dict(scripts, '#fetchWHOArticle') article = {} for key in ('title', 'linkUrl', 'imgUrl'): article[key] = item.get(key) statistics['WHOArticle'] = json.dumps(article) # wiki wiki_result = self.get_dict(scripts, '#getWikiList') wikis = wiki_result['result'] result = [] for item in wikis: wiki = {} for key in ('title', 'linkUrl', 'imgUrl', 'description'): wiki[key] = item.get(key) result.append(wiki) statistics['wikis'] = json.dumps(result) # 购物指南 guides = self.get_list(scripts, '#fetchGoodsGuide') result = [] for item in guides: guide = {} for key in ('categoryName', 'title', 'recordStatus', 'contentImgUrls'): guide[key] = item.get(key) result.append(guide) statistics['goodsGuides'] = json.dumps(result) # 辟谣与防护 rumors = self.get_list(scripts, '#getIndexRumorList') result = [] for item in rumors: rumor = {} for key in ('title', 'mainSummary', 'summary', 'body', 'sourceUrl', 'score', 'rumorType'): rumor[key] = item.get(key) result.append(rumor) statistics['rumors'] = json.dumps(result) yield statistics # 国外数据 countries = self.get_list(scripts, '#getListByCountryTypeService2true') for country in countries: country.pop('id', None) country['countryName'] = country.pop('provinceName', None) country['provinceName'] = '' country.pop('countryType') country.pop('cityName') country.pop('provinceId') country.pop('provinceName') country.pop('provinceShortName') country.pop('modifyTime', None) country.pop('createTime', None) country['incrVo'] = json.dumps(country.get('incrVo', {})) statistics_data = country.get('statisticsData') if statistics_data: yield scrapy.Request( statistics_data, callback=self.parse_country_statistics_data, meta={'country': country}) else: yield items.CountryItem(dailyData=[], **country) self.crawled = 1 # 代表爬虫已爬取数据
def gen_date(min_date=today_add_days(-365), max_date=today_add_days(365)): min_date = time.mktime(min_date.timetuple()) max_date = time.mktime(max_date.timetuple()) for __ in itertools.count(): random_time = min_date + random.random() * (max_date - min_date) yield tz_datetime.fromtimestamp(time.mktime(time.localtime(random_time))).date()
def inbound_mail(request): """Parse an inbound Mailgun event (https://documentation.mailgun.com/ quickstart-receiving.html) and save the data as models""" if not verify_mailgun_token(request.POST.get('token'), request.POST.get('timestamp'), request.POST.get('signature')): return HttpResponseForbidden('Failed Mailgun token validation.') required_fields = ( 'sender', 'To', 'Date', ) missing_fields = [x for x in required_fields if x not in request.POST] if missing_fields: return HttpResponseBadRequest('Missing a requied field.') # Parse the out sender/recipient fields sender_address = parseaddr(request.POST.get('sender')) recipient_addresses = getaddresses(request.POST.get('To').split(',')) if sender_address[1].split('@')[1] not in \ settings.FOIATRACKER_ALLOWED_DOMAINS: msg = '"%s" is not authorized to use FOIAtracker.' % sender_address[1] return HttpResponseForbidden(msg) # Make a timezone-aware datetime from the Date field date_field = request.POST.get('Date') parsed_from_email = parsedate_tz(date_field) if parsed_from_email is not None: parsed_timestamp = mktime_tz(parsed_from_email) parsed_datetime = datetime.fromtimestamp(parsed_timestamp) else: parsed_datetime = datetime.now() sent = tz_aware_date(parsed_datetime) # Create a model for the email email = InboundEmail.objects.create( raw=request.POST.get('body-plain', ''), text=request.POST.get('stripped-text', ''), html=request.POST.get('body-html', ''), sent=sent, sender=get_model_by_email(Sender, sender_address[1]), subject=request.POST.get('subject', '')) # Setup M2M relationships for email recipeints for address in recipient_addresses: # For addresses that aren't e-mails, like 'undisclosed-recipients' if '@' not in address[1]: continue if address[1].split('@')[1] in settings.FOIATRACKER_ALLOWED_DOMAINS: continue recipient = get_model_by_email(Recipient, address[1]) email.recipients.add(recipient) email.save() # Send a message to let the user know we're ready to classify tasks.email_prompt(email.pk) # Save file attachments to S3 and attach them to the message attachment_count = request.POST.get('attachment-count') if attachment_count is not None: try: num_attachments = int(attachment_count) for attachment_num in range(1, num_attachments + 1): attachment_key = 'attachment-%s' % attachment_num attached_file = request.FILES.get(attachment_key) if attached_file is None: continue EmailAttachment.objects.create( email=email, stored_file=attached_file, content_type=attached_file.content_type, size=attached_file.size) except ValueError: pass return HttpResponse()
def get_sample(self): return tz_datetime.fromtimestamp( time.mktime( time.localtime(self.min_date + random.random() * (self.max_date - self.min_date)))).replace( tzinfo=utc)