def set_options(self, *args, **kwargs): # Check for the user-defined data dir # otherwise put the data in the data dir under the project root data_dir = getattr(settings, 'CALACCESS_DOWNLOAD_DIR', os.path.join(settings.BASE_DIR, 'data')) self.url = 'http://campaignfinance.cdn.sos.ca.gov/dbwebexport.zip' self.data_dir = data_dir self.zip_path = os.path.join(self.data_dir, 'calaccess.zip') self.tsv_dir = os.path.join(self.data_dir, "tsv/") self.csv_dir = os.path.join(self.data_dir, "csv/") os.path.exists(self.csv_dir) or os.mkdir(self.csv_dir) self.metadata = self.get_metadata() self.prompt = PROMPT % ( dateformat(self.metadata['last-modified'], 'N j, Y'), dateformat(self.metadata['last-modified'], 'P'), naturaltime(self.metadata['last-modified']), size(self.metadata['content-length']), self.data_dir, ) self.pbar = progressbar.ProgressBar( widgets=[ progressbar.Percentage(), progressbar.Bar(), ' ', progressbar.ETA(), ' ', progressbar.FileTransferSpeed() ], maxval=self.metadata['content-length'] )
def set_kwargs(self, obj): super(VersionDetail, self).set_kwargs(obj) self.kwargs.update({ 'year': obj.release_datetime.year, 'month': dateformat(obj.release_datetime, 'm'), 'day': dateformat(obj.release_datetime, 'd'), 'time': dateformat(obj.release_datetime, 'His'), })
def get_url(self, obj): return reverse('version_detail', kwargs=dict( year=obj.release_datetime.year, month=dateformat(obj.release_datetime, 'm'), day=dateformat(obj.release_datetime, 'd'), time=dateformat(obj.release_datetime, 'His'), ))
def get_url(self, obj): return reverse( 'version_detail', kwargs=dict( year=obj.release_datetime.year, month=dateformat(obj.release_datetime, 'm'), day=dateformat(obj.release_datetime, 'd'), time=dateformat(obj.release_datetime, 'His'), ) )
def get_apa_citation(self): """ The proper way to cite a screenshot in APA style. """ style = '%(title)s. (%(creation_date)s). <em>PastPages</em>. Retrieved from %(url)s' data = dict( title = "%s homepage at %s" % (self.site.name, dateformat(self.timestamp, 'N j, Y, P e')), creation_date = dateformat(self.timestamp, 'Y, N j'), url = "http://www.pastpages.org%s" % self.get_absolute_url(), ) return style % data
def get_chicago_citation(self): """ The proper way to cite a screenshot in Chicago style. """ style = '"%(title)s." PastPages. Last modified %(creation_date)s, %(url)s.' data = dict( title = "%s homepage at %s" % (self.site.name, dateformat(self.timestamp, 'N j, Y, P e')), creation_date = dateformat(self.timestamp, 'F j, Y'), url = "http://www.pastpages.org%s" % self.get_absolute_url(), ) return style % data
def get_chicago_citation(self): """ The proper way to cite a screenshot in Chicago style. """ style = '"%(title)s." PastPages. Last modified %(creation_date)s, %(url)s.' data = dict( title="%s homepage at %s" % (self.site.name, dateformat(self.timestamp, 'N j, Y, P e')), creation_date=dateformat(self.timestamp, 'F j, Y'), url="http://www.pastpages.org%s" % self.get_absolute_url(), ) return style % data
def get_apa_citation(self): """ The proper way to cite a screenshot in APA style. """ style = '%(title)s. (%(creation_date)s). <em>PastPages</em>. Retrieved from %(url)s' data = dict( title="%s homepage at %s" % (self.site.name, dateformat(self.timestamp, 'N j, Y, P e')), creation_date=dateformat(self.timestamp, 'Y, N j'), url="http://www.pastpages.org%s" % self.get_absolute_url(), ) return style % data
def set_kwargs(self, obj): super(VersionDetail, self).set_kwargs(obj) self.kwargs.update({ 'year': obj.raw_version.release_datetime.year, 'month': dateformat(obj.raw_version.release_datetime, 'm'), 'day': dateformat(obj.raw_version.release_datetime, 'd'), 'time': dateformat(obj.raw_version.release_datetime, 'His'), })
def get_mla_citation(self): """ The proper way to cite a screenshot in MLA style. """ style = '"%(title)s." <em>PastPages</em>. %(creation_date)s. Web. %(today)s. <%(url)s>' data = dict( title = "%s homepage at %s" % (self.site.name, dateformat(self.timestamp, 'N j, Y, P e')), creation_date = dateformat(self.timestamp, 'j N Y'), today = dateformat(datetime.now().today(), 'j N Y'), url = "http://www.pastpages.org%s" % self.get_absolute_url(), ) return style % data
def get_mla_citation(self): """ The proper way to cite a screenshot in MLA style. """ style = '"%(title)s." <em>PastPages</em>. %(creation_date)s. Web. %(today)s. <%(url)s>' data = dict( title="%s homepage at %s" % (self.site.name, dateformat(self.timestamp, 'N j, Y, P e')), creation_date=dateformat(self.timestamp, 'j N Y'), today=dateformat(datetime.now().today(), 'j N Y'), url="http://www.pastpages.org%s" % self.get_absolute_url(), ) return style % data
def get_context_data(self, **kwargs): """ Add some extra bits to the template's context """ context = super(VersionDetail, self).get_context_data(**kwargs) context['date_string'] = dateformat( self.object.raw_version.release_datetime, "N j, Y") context[ 'description'] = "The {} release of CAL-ACCESS database, the government database that tracks \ campaign finance and lobbying activity in California politics.".format( context['date_string']) context['has_processed_version'] = True context['processed_version_completed'] = True if context['has_processed_version']: context['flat_zip'] = self.object.flat_zip context['relational_zip'] = self.object.relational_zip context['flat_files'] = self.get_flat_files() if self.object.raw_version.error_count: context['raw_files_w_errors'] = self.get_raw_files_w_errors() context['error_pct'] = ( 100 * self.object.raw_version.error_count / float(self.object.raw_version.download_record_count)) else: context['error_pct'] = 0 return context
def submit(request): """ View for submitting a URL """ if not request.user.is_authenticated(): # TODO redirect to an error page raise Http404 link_form = None if request.GET: link_form = LinkSubmitForm(request.GET) elif request.POST: link_form = LinkSubmitForm(request.POST) if link_form and link_form.is_valid(): url = link_form.cleaned_data['u'] link, created = Link.objects.get_or_create(url=url) return HttpResponse( simplejson.dumps({ "link": { "short_url": link.short_url(), "url": link.url, "date_submitted": dateformat(link.date_submitted, "N j, Y"), "usage_count": link.usage_count, }, "created": created, }), content_type='text/javascript' ) return HttpResponse(status=400)
def item_title(self, item): return u'Screenshots of %s taken at %s' % (item.site, dateformat( timezone.localtime( item.timestamp), 'l N j, Y, P e', ))
def timeago(d, format='M d, Y f a'): now = localtime(datetime.now()) delta = now - (d - timedelta(0, 0, d.microsecond)) if delta.days > 0: return dateformat(d, format) else: return '%s ago' %timesince(d)
def item_title(self, item): return u'Screenshots of %s taken at %s' % ( item.site, dateformat( timezone.localtime(item.timestamp), 'l N j, Y, P e', ) )
def get_wikipedia_citation(self): """ The proper way to cite a screenshot in Wikipedia markup. """ style = """{{cite web<br> | url = %(url)s<br> | title = %(title)s<br> | publisher = PastPages<br> | date = %(creation_date)s<br> | accessdate = %(today)s<br> | ref = {{harvid|PastPages-%(id)s|%(year)s}}<br> }}""" data = dict( title = "%s homepage at %s" % (self.site.name, dateformat(self.timestamp, 'N j, Y, P e')), creation_date = dateformat(self.timestamp, 'N j, Y'), today = dateformat(datetime.now().today(), 'N j, Y'), url = "http://www.pastpages.org%s" % self.get_absolute_url(), year = dateformat(self.timestamp, 'Y'), id = str(self.id), ) return style % data
def get_wikipedia_citation(self): """ The proper way to cite a screenshot in Wikipedia markup. """ style = """{{cite web<br> | url = %(url)s<br> | title = %(title)s<br> | publisher = PastPages<br> | date = %(creation_date)s<br> | accessdate = %(today)s<br> | ref = {{harvid|PastPages-%(id)s|%(year)s}}<br> }}""" data = dict( title="%s homepage at %s" % (self.site.name, dateformat(self.timestamp, 'N j, Y, P e')), creation_date=dateformat(self.timestamp, 'N j, Y'), today=dateformat(datetime.now().today(), 'N j, Y'), url="http://www.pastpages.org%s" % self.get_absolute_url(), year=dateformat(self.timestamp, 'Y'), id=str(self.id), ) return style % data
def randomify_by_date(seq, format_string='Ymd'): """ Shuffle the givens sequence but uses a seed based on the current date and time. You can pass a format string to control how often the randomization will change. For example: Ymd (the default) will change every day Ymdhi will change every minute Y will change every year ... """ seed = dateformat(timezone.now(), format_string) return randomify(seq, seed)
def ia_metadata(self): return dict( collection="pastpages", title='{} at {}'.format(self.site.name, dateformat(self.timestamp, 'N j, Y, P')), mediatype='image', contributor="pastpages.org", creator="pastpages.org", publisher=self.site.name, date=str(self.timestamp), subject=["news", "homepages", "screenshot"], pastpages_id=self.id, pastpages_url=self.get_absolute_url(), pastpages_timestamp=str(self.timestamp), pastpages_site_id=self.site.id, pastpages_site_slug=self.site.slug, pastpages_site_name=self.site.name, pastpages_update_id=self.update.id, )
def ajax_checkusername_and_smscode(request): # 目前mysql验证, 最好redis缓存 status = 'Y' username = request.POST.get('username', '') ali_customer_id = request.POST.get('ali_customer_id', '') _exsited = Customer.objects.filter(username=username).exists() # 验证用户名是否存在 if _exsited: return HttpResponse(json.dumps({'status': "N"}), content_type="application/json") obj, _created = CoreVerifyAli.objects.get_or_create( user_id=ali_customer_id) updated = obj.updated user_count = obj.user_count now = datetime.datetime.now().strftime("%Y-%m-%d") updated = dateformat(updated, "Y-m-d") # 当天获取验证码次数 if now == updated: if user_count >= 6: return HttpResponse(json.dumps({'status': "C"}), content_type="application/json") else: user_count = 0 # 发送短信 code, msg = sms_code.sms(username) if code == -1: return HttpResponse(json.dumps({'status': 'E'}), content_type="application/json") else: user_count += 1 obj.phone = username obj.code = msg obj.expire_time = int(time.time()) obj.user_count = user_count obj.save() return HttpResponse(json.dumps({'status': status}), content_type="application/json")
def ajax_unsubscribe_list(request, list_id): data = request.GET order_column = data.get('order[0][column]', '') order_dir = data.get('order[0][dir]', '') search = data.get('search[value]', '') colums = ['list_id', 'address', 'datetime'] obj = model_addresses.get_address_obj(request, list_id) user_id = obj.customer_id if request.user.id == user_id: is_modify_flag = obj.is_allow_export else: is_modify_flag = False where_str = u'list_id={}'.format(list_id) if search: where_str += u""" and address like '%{0}%' """.format(search) order_by_str = '' if order_column and int(order_column) < len(colums): if order_dir == 'desc': order_by_str = u'order by %s desc' % colums[int(order_column)] else: order_by_str = u'order by %s asc' % colums[int(order_column)] cr = connections['mm-pool'].cursor() tablename = 'ml_unsubscribe_' + str(user_id) count_sql = u"SELECT COUNT(1) FROM %s WHERE %s;" % (tablename, where_str) cr.execute(count_sql) rows = cr.fetchall() count = rows[0][0] try: length = int(data.get('length', 1)) except ValueError: length = 1 try: start_num = int(data.get('start', '0')) except ValueError: start_num = 0 if start_num >= count: start_num = 0 limit_str = u'limit %s offset %s' % (length, start_num) sql = u"SELECT address, datetime, list_id FROM %s WHERE %s %s %s" % ( tablename, where_str, order_by_str, limit_str) cr.execute(sql) rows = cr.fetchall() rs = { "sEcho": 0, "iTotalRecords": count, "iTotalDisplayRecords": count, "aaData": [] } page = start_num / length + 1 number = length * (page - 1) + 1 for r in rows: if is_modify_flag: modify_str = u'''<a type="button" class="btn btn-outline btn-danger btn-xs" href="Javascript: SetStatus({}, '{}', '-2')">{}</a>'''.format( r[2], r[0], _(u'删除')) else: modify_str = "" rs["aaData"].append([ number, r[0], dateformat(r[1], 'Y-m-d H:i:s'), modify_str, "", ]) number += 1 return HttpResponse(json.dumps(rs, ensure_ascii=False), content_type="application/json")
def __unicode__(self): return "%s - %s" % ( dateformat(self.start_date, "Y-m-d"), dateformat(self.end_date, "Y-m-d"), )
def clean(self): """ Clean up the raw data files from the state so they are ready to get loaded in the database. """ print "Cleaning data files" csv.field_size_limit(1000000000) # Up the CSV data limit date_field_dict = { 'CVR_SO': [ 'ACCT_OPENDT', 'QUALFY_DT', ], 'CVR_CAMPAIGN_DISCLOSURE_CD': [ 'ELECT_DATE', 'FROM_DATE', 'RPT_DATE', 'RPTFROMDT', 'RPTTHRUDT', 'THRU_DATE' ], 'CVR_LOBBY_DISCLOSURE_CD': [ 'CUM_BEG_DT', 'FROM_DATE', 'RPT_DATE', 'SIG_DATE', 'THRU_DATE' ], 'CVR_REGISTRATION_CD': [ 'COMPLET_DT', 'EFF_DATE', 'QUAL_DATE', 'RPT_DATE', 'SIG_DATE' ], 'EXPN_CD': [ 'EXPN_DATE' ], 'FILERNAME_CD': [ 'EFFECT_DT' ], 'FILER_FILINGS_CD': [ 'FILING_DATE', 'RPT_START', 'RPT_END', 'RPT_DATE' ], 'FILER_INTERESTS_CD': [ 'EFFECT_DATE' ], 'FILER_LINKS_CD': [ 'EFFECT_DT', 'TERMINATION_DT' ], 'FILER_TO_FILER_TYPE_CD': [ 'EFFECT_DT', 'NYQ_DT' ], 'FILER_XREF_CD': [ 'EFFECT_DT' ], 'FILING_PERIOD_CD': [ 'START_DATE', 'END_DATE', 'DEADLINE' ], 'LATT_CD': [ 'CUMBEG_DT', 'PMT_DATE', ], 'LCCM_CD': [ 'CTRIB_DATE', ], 'LEMP_CD': [ 'EFF_DATE' ], 'LEXP_CD': [ 'EXPN_DATE' ], 'LOAN_CD': [ 'LOAN_DATE1', 'LOAN_DATE2' ], 'LOBBY_AMENDMENTS_CD': [ 'ADD_L_EFF', 'ADD_LE_EFF', 'ADD_LF_EFF', 'DEL_LF_EFF', 'OTHER_EFF' ], 'LOTH_CD': [ 'PMT_DATE', ], 'RCPT_CD': [ 'DATE_THRU', 'RCPT_DATE' ], 'SMRY_CD': [ 'ELEC_DT' ], 'S496_CD': [ 'EXP_DATE', 'DATE_THRU', ], 'S497_CD': [ 'ELEC_DATE', 'CTRIB_DATE', 'DATE_THRU', ], 'S498_CD': [ 'DATE_RCVD', ], } # Loop through all the files in the source directory for name in os.listdir(self.tsv_dir): print "- %s" % name # Pull the data into memory tsv_path = os.path.join(self.tsv_dir, name) tsv_data = open(tsv_path, 'rb').read() # Nuke any null bytes null_bytes = tsv_data.count('\x00') if null_bytes: tsv_data = tsv_data.replace('\x00', ' ') # Nuke ASCII 26 char, the "substitute character" or chr(26) in python sub_char = tsv_data.count('\x1a') if sub_char: tsv_data = tsv_data.replace('\x1a', '') # Convert the file to a CSV line by line. csv_path = os.path.join( self.csv_dir, name.lower().replace("tsv", "csv") ) csv_file = open(csv_path, 'wb') csv_writer = CSVKitWriter(csv_file, quoting=csv.QUOTE_ALL) if tsv_data == '': print 'no data in %s' % name continue else: tsv_reader = StringIO(tsv_data) headers = tsv_reader.next() headers = headers.decode("ascii", "replace").encode('utf-8') headers_csv = CSVKitReader(StringIO(headers), delimiter='\t') headers_list = headers_csv.next() csv_writer.writerow(headers_list) line_number = 1 for tsv_line in tsv_reader: # Goofing around with the encoding while we're in there. tsv_line = tsv_line.decode("ascii", "replace").encode('utf-8') # choking on fields with bad quoting again # eg. '"HEIGHT AIN\'T RIGHT DOWNTOWN\' (DOWNTOWN HEIGHTS LIMIT INITIATIVE)', # quotes aren't closed try: csv_line = CSVKitReader(StringIO(tsv_line), delimiter='\t') #csv_line_date_cleaned = date_clean_csv_line(csv_line.next()) #csv_writer.writerow(csv_line_date_cleaned) csv_field_list = csv_line.next() except: tsv_clean_line = ''.join(c for c in tsv_line if c not in ('"', "'")) # so strip all quotes for now csv_line = CSVKitReader(StringIO(tsv_clean_line), delimiter='\t') #csv_line_date_cleaned = date_clean_csv_line(csv_line.next()) #csv_writer.writerow(csv_line_date_cleaned) csv_field_list = csv_line.next() if len(csv_field_list) == len(headers_list): if name.replace('.TSV', '') in date_field_dict: date_field_list = date_field_dict[name.replace('.TSV', '')] for f in date_field_list: if csv_field_list[headers_list.index(f)] != '': try: csv_field_list[headers_list.index(f)] = dateformat(dateparse(csv_field_list[headers_list.index(f)]), 'Y-m-d') except: print '+++++++++++ INVALID DATE: %s\t%s\t%s' % (name, f, csv_field_list[headers_list.index(f)]) csv_field_list[headers_list.index(f)] = '' else: print '+++++ %s bad parse of line %s headers=%s & this line=%s' % (name, line_number, len(headers_list), len(csv_field_list)) csv_writer.writerow(csv_field_list) line_number += 1 # Shut it down tsv_reader.close() csv_file.close()
def show_trak_date(value): while value: return dateformat(value, "Y-m-d") return "0000-00-00"
def show_click_datetime(value): while value: return dateformat(value, "Y-m-d H:i:s") return "-"
def show_click_date(value): while value: return dateformat(value, "Y-m-d") return "-"
def show_trak_datetime(value): while value: return dateformat(value, "Y-m-d H:i:s") return "0000-00-00 00:00:00"
def handle_chunk(self, item_set, chunk_num): items = [] for item in item_set: if not item: continue if not ContentType.objects.get_for_model(item): continue # normalize property names across models that may have different # names for same thing (i.e. "title" vs "headline"). see utils.py. item = normalize_all(item) if item.__class__.__name__.lower() == "photo": setattr(item, 'title', os.path.basename(smart_str(item.photo.name)) ) if not ( getattr(item, 'title', None) or getattr(item, 'pubdate', None) ): continue try: item.get_absolute_url() except KeyboardInterrupt: raise except: continue # add `comments` property to item setattr(item, "comments", self._get_comments_for_item(item)) for comment in item.comments: setattr(comment, 'gmt_timestamp', dateformat( dt_to_utc(comment.submit_date), "Y-m-d H:i:s" )) # use "app.model(pk)" as disqus_id item_ctype = ContentType.objects.get_for_model(item) disqus_id = "%s.%s(%s)" % ( item_ctype.app_label, item_ctype.name, item.pk ) setattr(item, "disqus_id", disqus_id) # turn local timestmap into a UTC timestamp, since that's what # disqus wants on import setattr(item, 'gmt_timestamp', dateformat( dt_to_utc(item.pubdate), "Y-m-d H:i:s" )) if self.verbosity > 1: print "%s(%s) -> %s" % (item_ctype.name, item.pk, item.title) items.append(item) self.state['processed_items'].add((item_ctype.pk, item.pk)) with open(EXPORT_FILENAME_FMT % (chunk_num), 'wb') as f: f.write(smart_str(TEMPL.render(site=self.current_site, items=items))) if self.verbosity > 0: filename = EXPORT_FILENAME_FMT % chunk_num print "%s" % filename print "=" * 60
def item_title(self, item): return u'Screenshots taken at %s' % dateformat( timezone.localtime(item.start), 'l N j, Y, P e', )
def get_url(self, obj): return '/date/%s/%s/%s/' % ( dateformat(obj, "Y"), dateformat(obj, "m"), dateformat(obj, "d") )