def pedalboard(self): bundlepath = self.get_argument('bundlepath') with open(DEFAULT_ICON_TEMPLATE, 'r') as fh: default_icon_template = squeeze(fh.read().replace("'", "\\'")) with open(DEFAULT_SETTINGS_TEMPLATE, 'r') as fh: default_settings_template = squeeze(fh.read().replace("'", "\\'")) try: pedalboard = get_pedalboard_info(bundlepath) except: print("ERROR in webserver.py: get_pedalboard_info failed") pedalboard = { 'height': 0, 'width': 0, 'title': "", 'connections': [], 'plugins': [], 'hardware': {}, } context = { 'default_icon_template': default_icon_template, 'default_settings_template': default_settings_template, 'pedalboard': b64encode(json.dumps(pedalboard).encode("utf-8")) } return context
def post(self): user = self.get_current_user() if user.username: self.redirect('/') username = squeeze(self.get_argument('username', '')) fullname = squeeze(self.get_argument('fullname', '')) errors = [] if not username or not fullname: errors.append('Username and full name are mandatory') else: try: user.username = username user.fullname = fullname user.save() self.redirect('/') return except InvalidUsernameError: errors.append('Username must contain only letters, numbers and underscores (a-z, 0-9, _)') except InvalidFullnameError: errors.append('Full name must contain only letters, numbers, underscores and spaces (a-z, 0-9, _)') except UsernameTakenError: errors.append('This username is already taken') except Exception as e: errors.append('Unexpected error') self.render("signup.html", user=user, errors=errors)
def get(self, path): if not path: path = 'index.html' section = path.split('.', 1)[0] if section == 'index': # Caching strategy. # 1. If we don't have a version parameter, redirect curVersion = self.get_version() try: version = url_escape(self.get_argument('v')) except web.MissingArgumentError: uri = self.request.uri uri += '&' if self.request.query else '?' uri += 'v=%s' % curVersion self.redirect(uri) return # 2. Make sure version is correct if IMAGE_VERSION is not None and version != curVersion: uri = self.request.uri.replace('v=%s' % version, 'v=%s' % curVersion) self.redirect(uri) return lv2_cleanup() lv2_init() else: version = self.get_version() loader = template.Loader(HTML_DIR) with open(DEFAULT_ICON_TEMPLATE, 'r') as fd: default_icon_template = squeeze(fd.read().replace("'", "\\'")) with open(DEFAULT_SETTINGS_TEMPLATE, 'r') as fd: default_settings_template = squeeze(fd.read().replace("'", "\\'")) with open(WIZARD_DB, 'r') as fh: wizard_db = json.load(fh) context = { 'default_device': DEFAULT_DEVICE, 'default_icon_template': default_icon_template, 'default_settings_template': default_settings_template, 'version': version, 'wizard_db': json.dumps(wizard_db), 'device_mode': 'true' if DEVICE_MODE else 'false', 'write_access': 'true' if LV2_DIR else 'false', } self.write(loader.load(path).generate(**context))
def get(self, path): if not path: path = 'index.html' section = path.split('.',1)[0] if section == 'index': # Caching strategy. # 1. If we don't have a version parameter, redirect curVersion = self.get_version() try: version = url_escape(self.get_argument('v')) except web.MissingArgumentError: uri = self.request.uri uri += '&' if self.request.query else '?' uri += 'v=%s' % curVersion self.redirect(uri) return # 2. Make sure version is correct if IMAGE_VERSION is not None and version != curVersion: uri = self.request.uri.replace('v=%s' % version, 'v=%s' % curVersion) self.redirect(uri) return lv2_cleanup() lv2_init() else: version = self.get_version() loader = template.Loader(HTML_DIR) with open(DEFAULT_ICON_TEMPLATE, 'r') as fd: default_icon_template = squeeze(fd.read().replace("'", "\\'")) with open(DEFAULT_SETTINGS_TEMPLATE, 'r') as fd: default_settings_template = squeeze(fd.read().replace("'", "\\'")) with open(WIZARD_DB, 'r') as fh: wizard_db = json.load(fh) context = { 'default_device': DEFAULT_DEVICE, 'default_icon_template': default_icon_template, 'default_settings_template': default_settings_template, 'version': version, 'wizard_db': json.dumps(wizard_db), 'device_mode': 'true' if DEVICE_MODE else 'false', 'write_access': 'true' if LV2_DIR else 'false', } self.write(loader.load(path).generate(**context))
def tlinject(handler, s, key_is_pt=False, *, mock=False): try: base, alt_set = handler._tlinject_base except AttributeError: base = TLINJECT_EMPTY alt_set = TLINJECT_ALT_EMPTY logging.warn("TLInject users should have a _tlinject_base dict") secret = handler.settings["db_coordinator"].tlinject_database.secret if secret: my = hmac.new(secret, s.encode("utf8"), hashlib.sha224).digest()[:12] my = base64.urlsafe_b64encode(my).decode("ascii").rstrip("=") else: my = "" data = [] if mock: data.append('data-mock="1"') if s in alt_set: data.append('data-overlay="1"') pretranslated = base.get(s, None) if pretranslated: if not key_is_pt: data.append(f'data-tlik="{xhtml_escape(s)}"') s = pretranslated all_data = " ".join(data) return squeeze( f"""<span class="tlinject" data-assr="{my}" {all_data}>{xhtml_escape(s)}</span>""" )
def SendEmailToUser(template, user): assert user.email is not None, user unsubscribe_cookie = User.CreateUnsubscribeCookie(user.user_id, AccountSettings.MARKETING) unsubscribe_url = "https://%s/unsubscribe?%s" % (options.domain, urlencode(dict(cookie=unsubscribe_cookie))) # Create arguments for the email template. fmt_args = {"first_name": user.given_name, "unsubscribe_url": unsubscribe_url} # Create arguments for the email. args = { "from": EmailManager.Instance().GetInfoAddress(), "fromname": "Viewfinder", "to": user.email, "subject": options.email_subject, } util.SetIfNotNone(args, "toname", user.name) args["html"] = template.generate(is_html=True, **fmt_args) args["text"] = template.generate(is_html=False, **fmt_args) print "Sending marketing email to %s (%s) (#%d)" % (user.email, user.name, user.user_id) if options.test_mode: global _is_first_email if _is_first_email: print args["html"] _is_first_email = False else: # Remove extra whitespace in the HTML (seems to help it avoid Gmail spam filter). args["html"] = escape.squeeze(args["html"]) yield gen.Task(EmailManager.Instance().SendEmail, description="marketing email", **args)
def get(self): self.set_header("Content-Type", "text/plain; charset=UTF-8") basedir = os.path.join(HTML_DIR, 'include') for template in os.listdir(basedir): if not re.match('^[a-z_]+\.html$', template): continue with open(os.path.join(basedir, template), 'r') as fh: contents = fh.read() self.write("TEMPLATES['%s'] = '%s';\n\n" % (template[:-5], squeeze(contents.replace("'", "\\'")))) self.finish()
def index(self): context = {} user_id = safe_json_load(USER_ID_JSON_FILE, dict) prefs = safe_json_load(PREFERENCES_JSON_FILE, dict) with open(DEFAULT_ICON_TEMPLATE, 'r') as fh: default_icon_template = squeeze(fh.read().replace("'", "\\'")) with open(DEFAULT_SETTINGS_TEMPLATE, 'r') as fh: default_settings_template = squeeze(fh.read().replace("'", "\\'")) pbname = xhtml_escape(SESSION.host.pedalboard_name) prname = SESSION.host.pedalpreset_name() fullpbname = pbname or "Untitled" if prname: fullpbname += " - " + prname context = { 'default_icon_template': default_icon_template, 'default_settings_template': default_settings_template, 'default_pedalboard': DEFAULT_PEDALBOARD, 'cloud_url': CLOUD_HTTP_ADDRESS, 'pedalboards_url': PEDALBOARDS_HTTP_ADDRESS, 'hardware_profile': b64encode(json.dumps(SESSION.get_hardware_actuators()).encode("utf-8")), 'version': self.get_argument('v'), 'lv2_plugin_dir': LV2_PLUGIN_DIR, 'bundlepath': SESSION.host.pedalboard_path, 'title': pbname, 'size': json.dumps(SESSION.host.pedalboard_size), 'fulltitle': fullpbname, 'titleblend': '' if SESSION.host.pedalboard_name else 'blend', 'using_app': 'true' if APP else 'false', 'using_mod': 'true' if DEVICE_KEY else 'false', 'user_name': xhtml_escape(user_id.get("name", "")), 'user_email': xhtml_escape(user_id.get("email", "")), 'favorites': json.dumps(gState.favorites), 'preferences': json.dumps(prefs), } return context
def _GetAuthEmail(cls, client, action, use_short_token, user_name, identity, short_url): """Returns a dict of parameters that will be passed to EmailManager.SendEmail in order to email an access token to a user who is verifying his/her account. """ action_info = VerifyIdBaseHandler.ACTION_MAP[action] identity_type, identity_value = Identity.SplitKey(identity.key) # Create arguments for the email. args = { 'from': EmailManager.Instance().GetInfoAddress(), 'fromname': 'Viewfinder', 'to': identity_value } util.SetIfNotNone(args, 'toname', user_name) # Create arguments for the email template. fmt_args = { 'user_name': user_name or identity_value, 'user_email': identity_value, 'url': 'https://%s/%s%s' % (ServerEnvironment.GetHost(), short_url.group_id, short_url.random_key), 'title': action_info.title, 'use_short_token': use_short_token, 'access_token': identity.access_token } # The email html format is designed to meet these requirements: # 1. It must be viewable on even the most primitive email html viewer. Avoid fancy CSS. # 2. It cannot contain any images. Some email systems (like Gmail) do not show images by default. # 3. It must be short and look good on an IPhone 4S screen. The action button should be visible # without any scrolling necessary. resources_mgr = ResourcesManager.Instance() if use_short_token: args['subject'] = 'Viewfinder Code: %s' % identity.access_token else: args['subject'] = action_info.title args['html'] = resources_mgr.GenerateTemplate( action_info.email_template, is_html=True, **fmt_args) args['text'] = resources_mgr.GenerateTemplate( action_info.email_template, is_html=False, **fmt_args) # Remove extra whitespace in the HTML (seems to help it avoid Gmail spam filter). args['html'] = escape.squeeze(args['html']) return args
def get(self): artists = [ squeeze(artist).replace(' ', '+') for artist in self.get_arguments('artist') if artist ] get_profile_responses = yield [ get_artist_profile(artist) for artist in artists ] decoded_response_bodies = [ json_decode(response.body) for response in get_profile_responses ] artist_profiles = format_artist_profiles(decoded_response_bodies) self.render('mainstream.html', snark=(how_mainstream(artist_profiles)))
def get(self, path): if not path: path = 'index.html' loader = template.Loader(HTML_DIR) with open(DEFAULT_ICON_TEMPLATE, 'r') as fd: default_icon_template = squeeze(fd.read().replace("'", "\\'")) with open(DEFAULT_SETTINGS_TEMPLATE, 'r') as fd: default_settings_template = squeeze(fd.read().replace("'", "\\'")) with open(WIZARD_DB, 'r') as fh: wizard_db = json.load(fh) context = { 'default_device': DEFAULT_DEVICE, 'default_icon_template': default_icon_template, 'default_settings_template': default_settings_template, 'wizard_db': json.dumps(wizard_db), 'write_access': 1 if LV2_DIR else 0, } self.write(loader.load(path).generate(**context))
def tlinject(handler, s, key_is_pt=False, *, mock=False): try: base = handler._tlinject_base except AttributeError: base = {} logging.warn("TLInject users should have a _tlinject_base dict") secret = handler.settings.get("tlinject_context").secret if secret: my = hmac.new(secret, s.encode("utf8"), hashlib.sha224).digest()[:12] my = base64.urlsafe_b64encode(my).decode("ascii").rstrip("=") else: my = "" if mock: extra = 'data-mock="1"' else: extra = "" pretranslated = base.get(s, None) if pretranslated: if key_is_pt: return squeeze( f"""<span class="tlinject" data-assr="{my}" {extra}> {xhtml_escape(pretranslated)} </span>""" ) return squeeze( f"""<span class="tlinject" data-assr="{my}" data-tlik="{xhtml_escape(s)}" {extra}> {xhtml_escape(pretranslated)} </span>""" ) else: return squeeze( f"""<span class="tlinject" data-assr="{my}" {extra}>{xhtml_escape(s)}</span>""" )
def post(self): email = squeeze(self.get_argument('email', '')) password = squeeze(self.get_argument('password', '')) action = self.get_argument('action') response = {'status': 'failed'} if not email or not password: response['error'] = 'Email and password are mandatory' self.write(json_encode(response)) return if action == 'Login': id = get_identity(email, password) if not id: response['error'] = 'Wrong Username/Email and password combination' self.write(json_encode(response)) return user = User.objects.find(id=id) if user: token = generate_token() user.authenticate(token) self.set_secure_cookie("auth", token) self.redirect('/') elif action == 'Signup': already_exists = email_exists(email) if not already_exists: fullname = email.split('@')[0].replace('.', ' ').replace('_', ' ') user = User.objects.create('', fullname) store_credentials(user.id, email, password) token = generate_token() user.authenticate(token) self.set_secure_cookie("auth", token) self.redirect('/signup')
def get_local_authors_from_db(database=None): ''' This just pulls out the authors from the local_authors table. Normalizes the form so they can be matched against the paper authors. ''' # open the database if needed and get a cursor if not database: database, cursor = opendb() closedb = True else: cursor = database.cursor() closedb = False # get all local authors first query = 'select author, email from local_authors' cursor.execute(query) rows = cursor.fetchall() if rows and len(rows) > 0: local_authors, author_emails = list(zip(*rows)) local_authors = [x.lower() for x in local_authors] local_authors = [x.replace('.', ' ') for x in local_authors] local_authors = [squeeze(x) for x in local_authors] # this contains firstinitial-lastname pairs local_author_fnames = [x.split() for x in local_authors] local_author_fnames = [ ''.join([x[0][0], x[-1]]) for x in local_author_fnames ] local_authors = [x.replace(' ', '') for x in local_authors] else: local_authors, local_author_fnames, author_emails = [], [], [] # at the end, close the cursor and DB connection if closedb: cursor.close() database.close() return local_authors, local_author_fnames, author_emails
def SendEmailToUser(template, user): assert user.email is not None, user unsubscribe_cookie = User.CreateUnsubscribeCookie( user.user_id, AccountSettings.MARKETING) unsubscribe_url = 'https://%s/unsubscribe?%s' % ( options.domain, urlencode(dict(cookie=unsubscribe_cookie))) # Create arguments for the email template. fmt_args = { 'first_name': user.given_name, 'unsubscribe_url': unsubscribe_url } # Create arguments for the email. args = { 'from': EmailManager.Instance().GetInfoAddress(), 'fromname': 'Viewfinder', 'to': user.email, 'subject': options.email_subject } util.SetIfNotNone(args, 'toname', user.name) args['html'] = template.generate(is_html=True, **fmt_args) args['text'] = template.generate(is_html=False, **fmt_args) print 'Sending marketing email to %s (%s) (#%d)' % (user.email, user.name, user.user_id) if options.test_mode: global _is_first_email if _is_first_email: print args['html'] _is_first_email = False else: # Remove extra whitespace in the HTML (seems to help it avoid Gmail spam filter). args['html'] = escape.squeeze(args['html']) yield gen.Task(EmailManager.Instance().SendEmail, description='marketing email', **args)
def _GetAuthEmail(cls, client, action, use_short_token, user_name, identity, short_url): """Returns a dict of parameters that will be passed to EmailManager.SendEmail in order to email an access token to a user who is verifying his/her account. """ action_info = VerifyIdBaseHandler.ACTION_MAP[action] identity_type, identity_value = Identity.SplitKey(identity.key) # Create arguments for the email. args = {'from': EmailManager.Instance().GetInfoAddress(), 'fromname': 'Viewfinder', 'to': identity_value} util.SetIfNotNone(args, 'toname', user_name) # Create arguments for the email template. fmt_args = {'user_name': user_name or identity_value, 'user_email': identity_value, 'url': 'https://%s/%s%s' % (ServerEnvironment.GetHost(), short_url.group_id, short_url.random_key), 'title': action_info.title, 'use_short_token': use_short_token, 'access_token': identity.access_token} # The email html format is designed to meet these requirements: # 1. It must be viewable on even the most primitive email html viewer. Avoid fancy CSS. # 2. It cannot contain any images. Some email systems (like Gmail) do not show images by default. # 3. It must be short and look good on an IPhone 4S screen. The action button should be visible # without any scrolling necessary. resources_mgr = ResourcesManager.Instance() if use_short_token: args['subject'] = 'Viewfinder Code: %s' % identity.access_token else: args['subject'] = action_info.title args['html'] = resources_mgr.GenerateTemplate(action_info.email_template, is_html=True, **fmt_args) args['text'] = resources_mgr.GenerateTemplate(action_info.email_template, is_html=False, **fmt_args) # Remove extra whitespace in the HTML (seems to help it avoid Gmail spam filter). args['html'] = escape.squeeze(args['html']) return args
def tag_local_authors(arxiv_date, database=None, firstname_match_threshold=99, fullname_match_threshold=99, update_db=False, verbose=False): ''' This finds all local authors for all papers on the date arxiv_date and tags the rows for them in the DB. ''' # open the database if needed and get a cursor if not database: database, cursor = opendb() closedb = True else: cursor = database.cursor() closedb = False # get all local authors first and normalize their form local_authors, local_author_fnames, local_emails = ( get_local_authors_from_db(database=database)) if len(local_authors) > 0: # get all the authors for this date query = 'select arxiv_id, authors from arxiv where utcdate = date(?)' query_params = (arxiv_date, ) cursor.execute(query, query_params) rows = cursor.fetchall() if rows and len(rows) > 0: local_author_articles = [] for row in rows: paper_authors = row[1] # get rid of the affiliations for matching to local authors paper_authors = strip_affils(paper_authors) # we'll save this initial cleaned version back to the database # for local matched papers so all the author indices line up # correctly cleaned_paper_authors = paper_authors[::] if verbose: print('%s authors: %s' % (row[0], repr(cleaned_paper_authors))) # normalize these names so we can compare them more robustly to # the local authors paper_authors = [x.lower().strip() for x in paper_authors] paper_authors = [ x.strip() for x in paper_authors if len(x) > 1 ] paper_authors = [x.replace('.', ' ') for x in paper_authors] paper_authors = [squeeze(x) for x in paper_authors] paper_author_fnames = [x.split() for x in paper_authors] paper_author_fnames = [ ''.join([x[0][0], x[-1]]) for x in paper_author_fnames ] paper_authors = [x.replace(' ', '') for x in paper_authors] if verbose: print("%s normalized authors: %s" % (row[0], repr(paper_authors))) local_matched_author_inds = [] local_matched_author_affils = [] # match to the flastname first, then if that works, try another # match with fullname. if both work, then we accept this as a # local author match for paper_author, paper_fname, paper_author_ind in zip( paper_authors, paper_author_fnames, range(len(paper_authors))): matched_author_fname = process.extractOne( paper_fname, local_author_fnames, score_cutoff=firstname_match_threshold) matched_author_full = process.extractOne( paper_author, local_authors, score_cutoff=fullname_match_threshold) if matched_author_fname and matched_author_full: print('%s: %s, matched paper author: %s ' 'to local author: %s. ' 'first name score: %s, full name score: %s' % ( row[0], paper_authors, paper_author, matched_author_full[0], matched_author_fname[1], matched_author_full[1], )) # update the paper author index column so we can # highlight them in the frontend local_matched_author_inds.append(paper_author_ind) # also update the affilation tag for this author local_authind = local_authors.index( matched_author_full[0]) # get the corresponding email local_matched_email = local_emails[local_authind] # split to get the affil tag local_matched_affil = ( local_matched_email.split('@')[-1]) if local_matched_affil in AFFIL_DICT: local_matched_author_affils.append( AFFIL_DICT[local_matched_affil]) # now that we have all the special affils, compress # them into only the unique ones local_matched_author_affils = list( set(local_matched_author_affils)) # # done with all authors for this paper # # now update the info for this paper if len(local_matched_author_inds) > 0 and update_db: # arxivid of this article that has local authors local_author_articles.append((row[0])) # these encode the positions of the local authors in the # author list local_author_indices = (','.join( ['%s' % x for x in local_matched_author_inds])) local_author_special_affils = ','.join( local_matched_author_affils) cursor.execute( 'update arxiv set ' 'authors = ?, ' 'local_authors = ?, ' 'local_author_indices = ?, ' 'local_author_specaffils = ? ' 'where ' 'arxiv_id = ?', (','.join(cleaned_paper_authors), True, local_author_indices, local_author_special_affils, row[0])) # # done with all papers for the day # # commit the transaction at the end if update_db: database.commit() return local_author_articles else: print('no articles for this date') return False else: print('no local authors defined') return False # at the end, close the cursor and DB connection if closedb: cursor.close() database.close()
def get_periodicfeatures( pfpickle, lcbasedir, outdir, fourierorder=5, # these are depth, duration, ingress duration transitparams=(-0.01, 0.1, 0.1), # these are depth, duration, depth ratio, secphase ebparams=(-0.2, 0.3, 0.7, 0.5), pdiff_threshold=1.0e-4, sidereal_threshold=1.0e-4, sampling_peak_multiplier=5.0, sampling_startp=None, sampling_endp=None, starfeatures=None, timecols=None, magcols=None, errcols=None, lcformat='hat-sql', lcformatdir=None, sigclip=10.0, verbose=True, raiseonfail=False): '''This gets all periodic features for the object. Parameters ---------- pfpickle : str The period-finding result pickle containing period-finder results to use for the calculation of LC fit, periodogram, and phased LC features. lcbasedir : str The base directory where the light curve for the current object is located. outdir : str The output directory where the results will be written. fourierorder : int The Fourier order to use to generate sinusoidal function and fit that to the phased light curve. transitparams : list of floats The transit depth, duration, and ingress duration to use to generate a trapezoid planet transit model fit to the phased light curve. The period used is the one provided in `period`, while the epoch is automatically obtained from a spline fit to the phased light curve. ebparams : list of floats The primary eclipse depth, eclipse duration, the primary-secondary depth ratio, and the phase of the secondary eclipse to use to generate an eclipsing binary model fit to the phased light curve. The period used is the one provided in `period`, while the epoch is automatically obtained from a spline fit to the phased light curve. pdiff_threshold : float This is the max difference between periods to consider them the same. sidereal_threshold : float This is the max difference between any of the 'best' periods and the sidereal day periods to consider them the same. sampling_peak_multiplier : float This is the minimum multiplicative factor of a 'best' period's normalized periodogram peak over the sampling periodogram peak at the same period required to accept the 'best' period as possibly real. sampling_startp, sampling_endp : float If the `pgramlist` doesn't have a time-sampling Lomb-Scargle periodogram, it will be obtained automatically. Use these kwargs to control the minimum and maximum period interval to be searched when generating this periodogram. starfeatures : str or None If not None, this should be the filename of the `starfeatures-<objectid>.pkl` created by :py:func:`astrobase.lcproc.lcsfeatures.get_starfeatures` for this object. This is used to get the neighbor's light curve and phase it with this object's period to see if this object is blended. timecols : list of str or None The timecol keys to use from the lcdict in calculating the features. magcols : list of str or None The magcol keys to use from the lcdict in calculating the features. errcols : list of str or None The errcol keys to use from the lcdict in calculating the features. lcformat : str This is the `formatkey` associated with your light curve format, which you previously passed in to the `lcproc.register_lcformat` function. This will be used to look up how to find and read the light curves specified in `basedir` or `use_list_of_filenames`. lcformatdir : str or None If this is provided, gives the path to a directory when you've stored your lcformat description JSONs, other than the usual directories lcproc knows to search for them in. Use this along with `lcformat` to specify an LC format JSON file that's not currently registered with lcproc. sigclip : float or int or sequence of two floats/ints or None If a single float or int, a symmetric sigma-clip will be performed using the number provided as the sigma-multiplier to cut out from the input time-series. If a list of two ints/floats is provided, the function will perform an 'asymmetric' sigma-clip. The first element in this list is the sigma value to use for fainter flux/mag values; the second element in this list is the sigma value to use for brighter flux/mag values. For example, `sigclip=[10., 3.]`, will sigclip out greater than 10-sigma dimmings and greater than 3-sigma brightenings. Here the meaning of "dimming" and "brightening" is set by *physics* (not the magnitude system), which is why the `magsarefluxes` kwarg must be correctly set. If `sigclip` is None, no sigma-clipping will be performed, and the time-series (with non-finite elems removed) will be passed through to the output. verbose : bool If True, will indicate progress while working. raiseonfail : bool If True, will raise an Exception if something goes wrong. Returns ------- str Returns a filename for the output pickle containing all of the periodic features for the input object's LC. ''' try: formatinfo = get_lcformat(lcformat, use_lcformat_dir=lcformatdir) if formatinfo: (fileglob, readerfunc, dtimecols, dmagcols, derrcols, magsarefluxes, normfunc) = formatinfo else: LOGERROR("can't figure out the light curve format") return None except Exception: LOGEXCEPTION("can't figure out the light curve format") return None # open the pfpickle if pfpickle.endswith('.gz'): infd = gzip.open(pfpickle) else: infd = open(pfpickle) pf = pickle.load(infd) infd.close() lcfile = os.path.join(lcbasedir, pf['lcfbasename']) objectid = pf['objectid'] if 'kwargs' in pf: kwargs = pf['kwargs'] else: kwargs = None # override the default timecols, magcols, and errcols # using the ones provided to the periodfinder # if those don't exist, use the defaults from the lcformat def if kwargs and 'timecols' in kwargs and timecols is None: timecols = kwargs['timecols'] elif not kwargs and not timecols: timecols = dtimecols if kwargs and 'magcols' in kwargs and magcols is None: magcols = kwargs['magcols'] elif not kwargs and not magcols: magcols = dmagcols if kwargs and 'errcols' in kwargs and errcols is None: errcols = kwargs['errcols'] elif not kwargs and not errcols: errcols = derrcols # check if the light curve file exists if not os.path.exists(lcfile): LOGERROR("can't find LC %s for object %s" % (lcfile, objectid)) return None # check if we have neighbors we can get the LCs for if starfeatures is not None and os.path.exists(starfeatures): with open(starfeatures, 'rb') as infd: starfeat = pickle.load(infd) if starfeat['closestnbrlcfname'].size > 0: nbr_full_lcf = starfeat['closestnbrlcfname'][0] # check for this LC in the lcbasedir if os.path.exists( os.path.join(lcbasedir, os.path.basename(nbr_full_lcf))): nbrlcf = os.path.join(lcbasedir, os.path.basename(nbr_full_lcf)) # if it's not there, check for this file at the full LC location elif os.path.exists(nbr_full_lcf): nbrlcf = nbr_full_lcf # otherwise, we can't find it, so complain else: LOGWARNING("can't find neighbor light curve file: %s in " "its original directory: %s, or in this object's " "lcbasedir: %s, skipping neighbor processing..." % (os.path.basename(nbr_full_lcf), os.path.dirname(nbr_full_lcf), lcbasedir)) nbrlcf = None else: nbrlcf = None else: nbrlcf = None # now, start processing for periodic feature extraction try: # get the object LC into a dict lcdict = readerfunc(lcfile) # this should handle lists/tuples being returned by readerfunc # we assume that the first element is the actual lcdict # FIXME: figure out how to not need this assumption if ((isinstance(lcdict, (list, tuple))) and (isinstance(lcdict[0], dict))): lcdict = lcdict[0] # get the nbr object LC into a dict if there is one if nbrlcf is not None: nbrlcdict = readerfunc(nbrlcf) # this should handle lists/tuples being returned by readerfunc # we assume that the first element is the actual lcdict # FIXME: figure out how to not need this assumption if ((isinstance(nbrlcdict, (list, tuple))) and (isinstance(nbrlcdict[0], dict))): nbrlcdict = nbrlcdict[0] # this will be the output file outfile = os.path.join( outdir, 'periodicfeatures-%s.pkl' % squeeze(objectid).replace(' ', '-')) # normalize using the special function if specified if normfunc is not None: lcdict = normfunc(lcdict) if nbrlcf: nbrlcdict = normfunc(nbrlcdict) resultdict = {} for tcol, mcol, ecol in zip(timecols, magcols, errcols): # dereference the columns and get them from the lcdict if '.' in tcol: tcolget = tcol.split('.') else: tcolget = [tcol] times = _dict_get(lcdict, tcolget) if nbrlcf: nbrtimes = _dict_get(nbrlcdict, tcolget) else: nbrtimes = None if '.' in mcol: mcolget = mcol.split('.') else: mcolget = [mcol] mags = _dict_get(lcdict, mcolget) if nbrlcf: nbrmags = _dict_get(nbrlcdict, mcolget) else: nbrmags = None if '.' in ecol: ecolget = ecol.split('.') else: ecolget = [ecol] errs = _dict_get(lcdict, ecolget) if nbrlcf: nbrerrs = _dict_get(nbrlcdict, ecolget) else: nbrerrs = None # # filter out nans, etc. from the object and any neighbor LC # # get the finite values finind = np.isfinite(times) & np.isfinite(mags) & np.isfinite(errs) ftimes, fmags, ferrs = times[finind], mags[finind], errs[finind] if nbrlcf: nfinind = (np.isfinite(nbrtimes) & np.isfinite(nbrmags) & np.isfinite(nbrerrs)) nbrftimes, nbrfmags, nbrferrs = (nbrtimes[nfinind], nbrmags[nfinind], nbrerrs[nfinind]) # get nonzero errors nzind = np.nonzero(ferrs) ftimes, fmags, ferrs = ftimes[nzind], fmags[nzind], ferrs[nzind] if nbrlcf: nnzind = np.nonzero(nbrferrs) nbrftimes, nbrfmags, nbrferrs = (nbrftimes[nnzind], nbrfmags[nnzind], nbrferrs[nnzind]) # normalize here if not using special normalization if normfunc is None: ntimes, nmags = normalize_magseries( ftimes, fmags, magsarefluxes=magsarefluxes) times, mags, errs = ntimes, nmags, ferrs if nbrlcf: nbrntimes, nbrnmags = normalize_magseries( nbrftimes, nbrfmags, magsarefluxes=magsarefluxes) nbrtimes, nbrmags, nbrerrs = nbrntimes, nbrnmags, nbrferrs else: nbrtimes, nbrmags, nbrerrs = None, None, None else: times, mags, errs = ftimes, fmags, ferrs if times.size > 999: # # now we have times, mags, errs (and nbrtimes, nbrmags, nbrerrs) # available_pfmethods = [] available_pgrams = [] available_bestperiods = [] for k in pf[mcol].keys(): if k in PFMETHODS: available_pgrams.append(pf[mcol][k]) if k != 'win': available_pfmethods.append(pf[mcol][k]['method']) available_bestperiods.append( pf[mcol][k]['bestperiod']) # # process periodic features for this magcol # featkey = 'periodicfeatures-%s' % mcol resultdict[featkey] = {} # first, handle the periodogram features pgramfeat = periodicfeatures.periodogram_features( available_pgrams, times, mags, errs, sigclip=sigclip, pdiff_threshold=pdiff_threshold, sidereal_threshold=sidereal_threshold, sampling_peak_multiplier=sampling_peak_multiplier, sampling_startp=sampling_startp, sampling_endp=sampling_endp, verbose=verbose) resultdict[featkey].update(pgramfeat) resultdict[featkey]['pfmethods'] = available_pfmethods # then for each bestperiod, get phasedlc and lcfit features for _ind, pfm, bp in zip(range(len(available_bestperiods)), available_pfmethods, available_bestperiods): resultdict[featkey][pfm] = periodicfeatures.lcfit_features( times, mags, errs, bp, fourierorder=fourierorder, transitparams=transitparams, ebparams=ebparams, sigclip=sigclip, magsarefluxes=magsarefluxes, verbose=verbose) phasedlcfeat = periodicfeatures.phasedlc_features( times, mags, errs, bp, nbrtimes=nbrtimes, nbrmags=nbrmags, nbrerrs=nbrerrs) resultdict[featkey][pfm].update(phasedlcfeat) else: LOGERROR('not enough finite measurements in magcol: %s, for ' 'pfpickle: %s, skipping this magcol' % (mcol, pfpickle)) featkey = 'periodicfeatures-%s' % mcol resultdict[featkey] = None # # end of per magcol processing # # write resultdict to pickle outfile = os.path.join( outdir, 'periodicfeatures-%s.pkl' % squeeze(objectid).replace(' ', '-')) with open(outfile, 'wb') as outfd: pickle.dump(resultdict, outfd, pickle.HIGHEST_PROTOCOL) return outfile except Exception: LOGEXCEPTION('failed to run for pf: %s, lcfile: %s' % (pfpickle, lcfile)) if raiseonfail: raise else: return None
def get(self): artists = [squeeze(artist).replace(' ', '+') for artist in self.get_arguments('artist') if artist] get_profile_responses = yield [get_artist_profile(artist) for artist in artists] decoded_response_bodies = [json_decode(response.body) for response in get_profile_responses] artist_profiles = format_artist_profiles(decoded_response_bodies) self.render('mainstream.html', snark=(how_mainstream(artist_profiles)))
def post(self): '''This handles the POST to /admin/email and updates the site-settings.json file on disk. ''' if not self.current_user: self.redirect('/') if ((not self.keycheck['status'] == 'ok') or (not self.xsrf_type == 'session')): self.set_status(403) retdict = { 'status': 'failed', 'result': None, 'message': ("Sorry, you don't have access. " "API keys are not allowed for this endpoint.") } self.write(retdict) raise tornado.web.Finish() # get the current user current_user = self.current_user # only allow in superuser roles if current_user and current_user['user_role'] == 'superuser': try: # get the form inputs loginval = xhtml_escape( self.get_argument('loginradio')).strip().lower() signupval = xhtml_escape( self.get_argument('signupradio')).strip().lower() emailsender = xhtml_escape( self.get_argument('emailsender')).strip() emailserver = xhtml_escape( self.get_argument('emailserver')).strip().lower() emailport = abs( int(xhtml_escape(self.get_argument('emailport')).strip())) emailuser = xhtml_escape( self.get_argument('emailuser')).strip() emailpass = self.get_argument('emailpass').strip() if loginval == 'login-allowed': loginval = True elif loginval == 'login-disallowed': loginval = False else: loginval = False if signupval == 'signup-allowed': signupval = True elif signupval == 'signup-disallowed': signupval = False else: signupval = False # make sure to check if the email settings are valid if signups # are enabled if signupval and (len(emailsender) == 0 or len(emailserver) == 0 or emailserver == 'smtp.emailserver.org' or emailport == 0 or len(emailuser) == 0 or len(emailpass) == 0): LOGGER.error('invalid items in the ' 'admin-email-update-form') self.set_status(400) retdict = { 'status': 'failed', 'result': None, 'message': ("Invalid input in the " "email settings form. " "All fields are required " "if new user sign-ups are " "to be enabled.") } self.write(retdict) raise tornado.web.Finish() # email addresses allowed_email_addrs = self.get_argument( 'allowedemailaddr', None) if allowed_email_addrs is not None: allowed_email_addrs = squeeze( xhtml_escape(allowed_email_addrs)).split(',') allowed_email_addrs = [ x.strip() for x in allowed_email_addrs ] else: allowed_email_addrs = [] # to update site-info.json updatedict = { "logins_allowed": loginval, "signups_allowed": signupval, "allowed_user_emailaddr": allowed_email_addrs, } # to update email-settings file emailupdatedict = { "email_sender": emailsender, "email_server": emailserver, "email_port": emailport, "email_user": emailuser, "email_pass": emailpass, } # update the siteinfo dict self.siteinfo.update(updatedict) # update the site-info.json file on disk siteinfojson = os.path.join(self.basedir, 'site-info.json') with open(siteinfojson, 'r') as infd: siteinfo_disk = json.load(infd) # update site-info.json only with values of logins-allowed, # signups-allowed siteinfo_disk.update(updatedict) LOGGER.warning( 'updating site-info.json from admin-email-update-form') with open(siteinfojson, 'w') as outfd: json.dump(siteinfo_disk, outfd, indent=4) # update the email-settings in the siteinfo dict and the # email-settings file next self.siteinfo.update(emailupdatedict) email_settings_file = os.path.join( self.basedir, self.siteinfo['email_settings_file']) if not os.path.exists(email_settings_file): LOGGER.error('no email settings file found ' 'at expected path indicated ' 'in site-info.json. Making a new one...') with open(email_settings_file, 'w') as outfd: json.dump(emailupdatedict, outfd, indent=4) else: # make sure we can write to the email settings file os.chmod(email_settings_file, 0o100600) with open(email_settings_file, 'r') as infd: emailsettings_disk = json.load(infd) emailsettings_disk.update(emailupdatedict) LOGGER.warning('updating email settings file ' 'from admin-email-update-form') with open(email_settings_file, 'w') as outfd: json.dump(emailsettings_disk, outfd, indent=4) # set email settings file permissions back to readonly os.chmod(email_settings_file, 0o100400) updatedict.update(emailupdatedict) returndict = { 'status': 'ok', 'message': ('Email and user sign-up/sign-in ' 'settings successfully updated.'), 'result': updatedict } self.write(returndict) self.finish() except Exception as e: LOGGER.exception('failed to update site-info.json') self.set_status(400) retdict = { 'status': 'failed', 'result': None, 'message': ("Invalid input provided for " "email-settings form.") } self.write(retdict) raise tornado.web.Finish() # anything else is probably the locked user, turn them away else: self.set_status(403) retdict = { 'status': 'failed', 'result': None, 'message': ("Sorry, you don't have access. " "API keys are not allowed for this endpoint.") } self.write(retdict) raise tornado.web.Finish()
def apply_epd_magseries(lcfile, timecol, magcol, errcol, externalparams, lcformat='hat-sql', lcformatdir=None, epdsmooth_sigclip=3.0, epdsmooth_windowsize=21, epdsmooth_func=smooth_magseries_savgol, epdsmooth_extraparams=None): '''This applies external parameter decorrelation (EPD) to a light curve. Parameters ---------- lcfile : str The filename of the light curve file to process. timecol,magcol,errcol : str The keys in the lcdict produced by your light curve reader function that correspond to the times, mags/fluxes, and associated measurement errors that will be used as input to the EPD process. externalparams : dict or None This is a dict that indicates which keys in the lcdict obtained from the lcfile correspond to the required external parameters. As with timecol, magcol, and errcol, these can be simple keys (e.g. 'rjd') or compound keys ('magaperture1.mags'). The dict should look something like:: {'fsv':'<lcdict key>' array: S values for each observation, 'fdv':'<lcdict key>' array: D values for each observation, 'fkv':'<lcdict key>' array: K values for each observation, 'xcc':'<lcdict key>' array: x coords for each observation, 'ycc':'<lcdict key>' array: y coords for each observation, 'bgv':'<lcdict key>' array: sky background for each observation, 'bge':'<lcdict key>' array: sky background err for each observation, 'iha':'<lcdict key>' array: hour angle for each observation, 'izd':'<lcdict key>' array: zenith distance for each observation} Alternatively, if these exact keys are already present in the lcdict, indicate this by setting externalparams to None. lcformat : str This is the `formatkey` associated with your light curve format, which you previously passed in to the `lcproc.register_lcformat` function. This will be used to look up how to find and read the light curves specified in `basedir` or `use_list_of_filenames`. lcformatdir : str or None If this is provided, gives the path to a directory when you've stored your lcformat description JSONs, other than the usual directories lcproc knows to search for them in. Use this along with `lcformat` to specify an LC format JSON file that's not currently registered with lcproc. epdsmooth_sigclip : float or int or sequence of two floats/ints or None This specifies how to sigma-clip the input LC before fitting the EPD function to it. If a single float or int, a symmetric sigma-clip will be performed using the number provided as the sigma-multiplier to cut out from the input time-series. If a list of two ints/floats is provided, the function will perform an 'asymmetric' sigma-clip. The first element in this list is the sigma value to use for fainter flux/mag values; the second element in this list is the sigma value to use for brighter flux/mag values. For example, `sigclip=[10., 3.]`, will sigclip out greater than 10-sigma dimmings and greater than 3-sigma brightenings. Here the meaning of "dimming" and "brightening" is set by *physics* (not the magnitude system), which is why the `magsarefluxes` kwarg must be correctly set. If `sigclip` is None, no sigma-clipping will be performed, and the time-series (with non-finite elems removed) will be passed through to the output. epdsmooth_windowsize : int This is the number of LC points to smooth over to generate a smoothed light curve that will be used to fit the EPD function. epdsmooth_func : Python function This sets the smoothing filter function to use. A Savitsky-Golay filter is used to smooth the light curve by default. The functions that can be used with this kwarg are listed in `varbase.trends`. If you want to use your own function, it MUST have the following signature:: def smoothfunc(mags_array, window_size, **extraparams) and return a numpy array of the same size as `mags_array` with the smoothed time-series. Any extra params can be provided using the `extraparams` dict. epdsmooth_extraparams : dict This is a dict of any extra filter params to supply to the smoothing function. Returns ------- str Writes the output EPD light curve to a pickle that contains the lcdict with an added `lcdict['epd']` key, which contains the EPD times, mags/fluxes, and errs as `lcdict['epd']['times']`, `lcdict['epd']['mags']`, and `lcdict['epd']['errs']`. Returns the filename of this generated EPD LC pickle file. Notes ----- - S -> measure of PSF sharpness (~1/sigma^2 sosmaller S = wider PSF) - D -> measure of PSF ellipticity in xy direction - K -> measure of PSF ellipticity in cross direction S, D, K are related to the PSF's variance and covariance, see eqn 30-33 in A. Pal's thesis: https://arxiv.org/abs/0906.3486 ''' try: formatinfo = get_lcformat(lcformat, use_lcformat_dir=lcformatdir) if formatinfo: (dfileglob, readerfunc, dtimecols, dmagcols, derrcols, magsarefluxes, normfunc) = formatinfo else: LOGERROR("can't figure out the light curve format") return None except Exception as e: LOGEXCEPTION("can't figure out the light curve format") return None lcdict = readerfunc(lcfile) if ((isinstance(lcdict, (tuple, list))) and isinstance(lcdict[0], dict)): lcdict = lcdict[0] objectid = lcdict['objectid'] times, mags, errs = lcdict[timecol], lcdict[magcol], lcdict[errcol] if externalparams is not None: fsv = lcdict[externalparams['fsv']] fdv = lcdict[externalparams['fdv']] fkv = lcdict[externalparams['fkv']] xcc = lcdict[externalparams['xcc']] ycc = lcdict[externalparams['ycc']] bgv = lcdict[externalparams['bgv']] bge = lcdict[externalparams['bge']] iha = lcdict[externalparams['iha']] izd = lcdict[externalparams['izd']] else: fsv = lcdict['fsv'] fdv = lcdict['fdv'] fkv = lcdict['fkv'] xcc = lcdict['xcc'] ycc = lcdict['ycc'] bgv = lcdict['bgv'] bge = lcdict['bge'] iha = lcdict['iha'] izd = lcdict['izd'] # apply the corrections for EPD epd = epd_magseries(times, mags, errs, fsv, fdv, fkv, xcc, ycc, bgv, bge, iha, izd, magsarefluxes=magsarefluxes, epdsmooth_sigclip=epdsmooth_sigclip, epdsmooth_windowsize=epdsmooth_windowsize, epdsmooth_func=epdsmooth_func, epdsmooth_extraparams=epdsmooth_extraparams) # save the EPD magseries to a pickle LC lcdict['epd'] = epd outfile = os.path.join( os.path.dirname(lcfile), '%s-epd-%s-pklc.pkl' % (squeeze(objectid).replace(' ', '-'), magcol)) with open(outfile, 'wb') as outfd: pickle.dump(lcdict, outfd, protocol=pickle.HIGHEST_PROTOCOL) return outfile
def runpf(lcfile, outdir, timecols=None, magcols=None, errcols=None, lcformat='hat-sql', lcformatdir=None, pfmethods=('gls','pdm','mav','win'), pfkwargs=({},{},{},{}), sigclip=10.0, getblssnr=False, nworkers=NCPUS, minobservations=500, excludeprocessed=False, raiseonfail=False): '''This runs the period-finding for a single LC. Parameters ---------- lcfile : str The light curve file to run period-finding on. outdir : str The output directory where the result pickle will go. timecols : list of str or None The timecol keys to use from the lcdict in calculating the features. magcols : list of str or None The magcol keys to use from the lcdict in calculating the features. errcols : list of str or None The errcol keys to use from the lcdict in calculating the features. lcformat : str This is the `formatkey` associated with your light curve format, which you previously passed in to the `lcproc.register_lcformat` function. This will be used to look up how to find and read the light curves specified in `basedir` or `use_list_of_filenames`. lcformatdir : str or None If this is provided, gives the path to a directory when you've stored your lcformat description JSONs, other than the usual directories lcproc knows to search for them in. Use this along with `lcformat` to specify an LC format JSON file that's not currently registered with lcproc. pfmethods : list of str This is a list of period finding methods to run. Each element is a string matching the keys of the `PFMETHODS` dict above. By default, this runs GLS, PDM, AoVMH, and the spectral window Lomb-Scargle periodogram. pfkwargs : list of dicts This is used to provide any special kwargs as dicts to each period-finding method function specified in `pfmethods`. sigclip : float or int or sequence of two floats/ints or None If a single float or int, a symmetric sigma-clip will be performed using the number provided as the sigma-multiplier to cut out from the input time-series. If a list of two ints/floats is provided, the function will perform an 'asymmetric' sigma-clip. The first element in this list is the sigma value to use for fainter flux/mag values; the second element in this list is the sigma value to use for brighter flux/mag values. For example, `sigclip=[10., 3.]`, will sigclip out greater than 10-sigma dimmings and greater than 3-sigma brightenings. Here the meaning of "dimming" and "brightening" is set by *physics* (not the magnitude system), which is why the `magsarefluxes` kwarg must be correctly set. If `sigclip` is None, no sigma-clipping will be performed, and the time-series (with non-finite elems removed) will be passed through to the output. getblssnr : bool If this is True and BLS is one of the methods specified in `pfmethods`, will also calculate the stats for each best period in the BLS results: transit depth, duration, ingress duration, refit period and epoch, and the SNR of the transit. nworkers : int The number of parallel period-finding workers to launch. minobservations : int The minimum number of finite LC points required to process a light curve. excludeprocessed : bool If this is True, light curves that have existing period-finding result pickles in `outdir` will not be processed. FIXME: currently, this uses a dumb method of excluding already-processed files. A smarter way to do this is to (i) generate a SHA512 cachekey based on a repr of `{'lcfile', 'timecols', 'magcols', 'errcols', 'lcformat', 'pfmethods', 'sigclip', 'getblssnr', 'pfkwargs'}`, (ii) make sure all list kwargs in the dict are sorted, (iii) check if the output file has the same cachekey in its filename (last 8 chars of cachekey should work), so the result was processed in exactly the same way as specifed in the input to this function, and can therefore be ignored. Will implement this later. raiseonfail : bool If something fails and this is True, will raise an Exception instead of returning None at the end. Returns ------- str The path to the output period-finding result pickle. ''' try: formatinfo = get_lcformat(lcformat, use_lcformat_dir=lcformatdir) if formatinfo: (dfileglob, readerfunc, dtimecols, dmagcols, derrcols, magsarefluxes, normfunc) = formatinfo else: LOGERROR("can't figure out the light curve format") return None except Exception as e: LOGEXCEPTION("can't figure out the light curve format") return None # override the default timecols, magcols, and errcols # using the ones provided to the function if timecols is None: timecols = dtimecols if magcols is None: magcols = dmagcols if errcols is None: errcols = derrcols try: # get the LC into a dict lcdict = readerfunc(lcfile) # this should handle lists/tuples being returned by readerfunc # we assume that the first element is the actual lcdict # FIXME: figure out how to not need this assumption if ( (isinstance(lcdict, (list, tuple))) and (isinstance(lcdict[0], dict)) ): lcdict = lcdict[0] outfile = os.path.join(outdir, 'periodfinding-%s.pkl' % squeeze(lcdict['objectid']).replace(' ', '-')) # if excludeprocessed is True, return the output file if it exists and # has a size that is at least 100 kilobytes (this should be enough to # contain the minimal results of this function). if excludeprocessed: test_outfile = os.path.exists(outfile) test_outfile_gz = os.path.exists(outfile+'.gz') if (test_outfile and os.stat(outfile).st_size > 102400): LOGWARNING('periodfinding result for %s already exists at %s, ' 'skipping because excludeprocessed=True' % (lcfile, outfile)) return outfile elif (test_outfile_gz and os.stat(outfile+'.gz').st_size > 102400): LOGWARNING( 'gzipped periodfinding result for %s already ' 'exists at %s, skipping because excludeprocessed=True' % (lcfile, outfile+'.gz') ) return outfile+'.gz' # this is the final returndict resultdict = { 'objectid':lcdict['objectid'], 'lcfbasename':os.path.basename(lcfile), 'kwargs':{'timecols':timecols, 'magcols':magcols, 'errcols':errcols, 'lcformat':lcformat, 'lcformatdir':lcformatdir, 'pfmethods':pfmethods, 'pfkwargs':pfkwargs, 'sigclip':sigclip, 'getblssnr':getblssnr} } # normalize using the special function if specified if normfunc is not None: lcdict = normfunc(lcdict) for tcol, mcol, ecol in zip(timecols, magcols, errcols): # dereference the columns and get them from the lcdict if '.' in tcol: tcolget = tcol.split('.') else: tcolget = [tcol] times = _dict_get(lcdict, tcolget) if '.' in mcol: mcolget = mcol.split('.') else: mcolget = [mcol] mags = _dict_get(lcdict, mcolget) if '.' in ecol: ecolget = ecol.split('.') else: ecolget = [ecol] errs = _dict_get(lcdict, ecolget) # normalize here if not using special normalization if normfunc is None: ntimes, nmags = normalize_magseries( times, mags, magsarefluxes=magsarefluxes ) times, mags, errs = ntimes, nmags, errs # run each of the requested period-finder functions resultdict[mcol] = {} # check if we have enough non-nan observations to proceed finmags = mags[np.isfinite(mags)] if finmags.size < minobservations: LOGERROR('not enough non-nan observations for ' 'this LC. have: %s, required: %s, ' 'magcol: %s, skipping...' % (finmags.size, minobservations, mcol)) continue pfmkeys = [] for pfmind, pfm, pfkw in zip(range(len(pfmethods)), pfmethods, pfkwargs): pf_func = PFMETHODS[pfm] # get any optional kwargs for this function pf_kwargs = pfkw pf_kwargs.update({'verbose':False, 'nworkers':nworkers, 'magsarefluxes':magsarefluxes, 'sigclip':sigclip}) # we'll always prefix things with their index to allow multiple # invocations and results from the same period-finder (for # different period ranges, for example). pfmkey = '%s-%s' % (pfmind, pfm) pfmkeys.append(pfmkey) # run this period-finder and save its results to the output dict resultdict[mcol][pfmkey] = pf_func( times, mags, errs, **pf_kwargs ) # # done with running the period finders # # append the pfmkeys list to the magcol dict resultdict[mcol]['pfmethods'] = pfmkeys # check if we need to get the SNR from any BLS pfresults if 'bls' in pfmethods and getblssnr: # we need to scan thru the pfmethods to get to any BLS pfresults for pfmk in resultdict[mcol]['pfmethods']: if 'bls' in pfmk: try: bls = resultdict[mcol][pfmk] # calculate the SNR for the BLS as well blssnr = bls_snr(bls, times, mags, errs, magsarefluxes=magsarefluxes, verbose=False) # add the SNR results to the BLS result dict resultdict[mcol][pfmk].update({ 'snr':blssnr['snr'], 'transitdepth':blssnr['transitdepth'], 'transitduration':blssnr['transitduration'], }) # update the BLS result dict with the refit periods # and epochs using the results from bls_snr resultdict[mcol][pfmk].update({ 'nbestperiods':blssnr['period'], 'epochs':blssnr['epoch'] }) except Exception as e: LOGEXCEPTION('could not calculate BLS SNR for %s' % lcfile) # add the SNR null results to the BLS result dict resultdict[mcol][pfmk].update({ 'snr':[np.nan,np.nan,np.nan,np.nan,np.nan], 'transitdepth':[np.nan,np.nan,np.nan, np.nan,np.nan], 'transitduration':[np.nan,np.nan,np.nan, np.nan,np.nan], }) elif 'bls' in pfmethods: # we need to scan thru the pfmethods to get to any BLS pfresults for pfmk in resultdict[mcol]['pfmethods']: if 'bls' in pfmk: # add the SNR null results to the BLS result dict resultdict[mcol][pfmk].update({ 'snr':[np.nan,np.nan,np.nan,np.nan,np.nan], 'transitdepth':[np.nan,np.nan,np.nan, np.nan,np.nan], 'transitduration':[np.nan,np.nan,np.nan, np.nan,np.nan], }) # once all mag cols have been processed, write out the pickle with open(outfile, 'wb') as outfd: pickle.dump(resultdict, outfd, protocol=pickle.HIGHEST_PROTOCOL) return outfile except Exception as e: LOGEXCEPTION('failed to run for %s, because: %s' % (lcfile, e)) if raiseonfail: raise return None
def test_squeeze(self): self.assertEqual(squeeze(u('sequences of whitespace chars')) , u('sequences of whitespace chars'))
def post(self): '''This handles the POST request to /users/new. ''' if not self.current_user: self.redirect('/') if ((not self.keycheck['status'] == 'ok') or (not self.xsrf_type == 'session')): self.set_status(403) retdict = { 'status': 'failed', 'result': None, 'message': ("Sorry, you don't have access. " "API keys are not allowed for this endpoint.") } self.write(retdict) raise tornado.web.Finish() current_user = self.current_user # get the provided email and password try: username = xhtml_escape(self.get_argument('username')) email = xhtml_escape(self.get_argument('email')) password = self.get_argument('password') except Exception as e: LOGGER.error('username, email, and password are all required.') self.save_flash_messages( "A user name, email address, and " "strong password are all required.", "warning") self.redirect('/users/new') # check if this email address is allowed to sign up for an account if ('allowed_user_emailaddr' in self.siteinfo and len(self.siteinfo['allowed_user_emailaddr']) > 0): if (squeeze(email.lower().strip()) not in self.siteinfo['allowed_user_emailaddr']): LOGGER.error("Email: %s is not allowed to sign up." % email) self.save_flash_messages( "Sorry, the email address you entered wasn't found in " "the list of people allowed to " "sign up for an account here.", "danger") self.redirect('/users/new') raise tornado.web.Finish() # talk to the authnzerver to sign this user up ok, resp, msgs = yield self.authnzerver_request( 'user-new', { 'session_token': current_user['session_token'], 'username': username, 'email': squeeze(email.lower().strip()), 'password': password }) # FIXME: don't generate a new sesion token here yet # # generate a new anon session token in any case # new_session = yield self.new_session_token( # user_id=2, # expires_days=self.session_expiry, # ) # if the sign up request is successful, send the email if ok: # # send the background request to authnzerver to send an email # # get the email info from site-info.json smtp_sender = self.siteinfo['email_sender'] smtp_user = self.siteinfo['email_user'] smtp_pass = self.siteinfo['email_pass'] smtp_server = self.siteinfo['email_server'] smtp_port = self.siteinfo['email_port'] # generate a fernet verification token that is timestamped. we'll # give it 15 minutes to expire and decrypt it using: # self.ferneter.decrypt(token, ttl=15*60) fernet_verification_token = self.ferneter.encrypt( secrets.token_urlsafe(32).encode()) # get this server's base URL if self.request.headers.get('X-Real-Host'): server_baseurl = '%s://%s' % ( self.request.headers.get('X-Forwarded-Proto'), self.request.headers.get('X-Real-Host')) else: server_baseurl = '%s://%s' % (self.request.protocol, self.request.host) ok, resp, msgs = yield self.authnzerver_request( 'user-signup-email', { 'email_address': email, 'server_baseurl': server_baseurl, 'server_id': 'HSC viz-inspect', 'session_token': current_user['session_token'], 'smtp_server': smtp_server, 'smtp_sender': smtp_sender, 'smtp_user': smtp_user, 'smtp_pass': smtp_pass, 'smtp_server': smtp_server, 'smtp_port': smtp_port, 'fernet_verification_token': fernet_verification_token, 'created_info': resp }) if ok: self.save_flash_messages( "Thanks for signing up! We've sent a verification " "request to your email address. " "Please complete user registration by " "entering the code you received.", "primary") self.redirect('/users/verify') # FIXME: if the backend breaks here, the user is left in limbo # what to do? else: LOGGER.error('failed to send an email. %r' % msgs) self.save_flash_messages(msgs, 'warning') self.redirect('/users/new') # if the sign up request fails, tell the user what happened else: LOGGER.error("Could not complete sign up request: %r" % msgs) self.save_flash_messages(" ".join(msgs), "danger") self.redirect('/users/new')
def get_arxiv_articles(paperlinks, paperdata, crosslinks, crossdata): paperdict = {} crossdict = {} for ind, link, data in zip(range(len(paperlinks)), paperlinks, paperdata): paper_abstract = squeeze(data.p.text.replace('\n', ' ').strip()) paper_title = squeeze( data.find_all('div', class_='list-title')[0].text.strip('\n').replace( 'Title:', '', 1)) paper_authors = (data.find_all( 'div', class_='list-authors')[0].text.strip('\n').replace( 'Authors:', '', 1)) paper_authors = [ squeeze(x.lstrip('\n').rstrip('\n')) for x in paper_authors.split(', ') ] paper_links = link.find_all('a')[1:3] paper_link, arxiv_id = paper_links[0]['href'], paper_links[0].text paper_pdf = paper_links[1]['href'] try: comment_contents = data.find('div', class_='list-comments').contents[2:] paper_comments = squeeze(' '.join([ str(x).lstrip('\n').rstrip('\n') for x in comment_contents ]).strip()) # handle internal arxiv links correctly if '<a href="/abs' in paper_comments: paper_comments = paper_comments.replace( '/abs', 'https://arxiv.org/abs') except AttributeError: paper_comments = '' paperdict[ind + 1] = { 'authors': paper_authors, 'title': paper_title, 'abstract': paper_abstract, 'comments': paper_comments, 'arxiv': arxiv_id, 'link': paper_link, 'pdf': paper_pdf } for ind, link, data in zip(range(len(crosslinks)), crosslinks, crossdata): cross_abstract = squeeze(data.p.text.replace('\n', ' ').strip()) cross_title = squeeze( data.find_all('div', class_='list-title')[0].text.strip('\n').replace( 'Title:', '', 1)) cross_authors = (data.find_all( 'div', class_='list-authors')[0].text.strip('\n').replace( 'Authors:', '', 1)) cross_authors = [ squeeze(x.lstrip('\n').rstrip('\n')) for x in cross_authors.split(', ') ] cross_links = link.find_all('a')[1:3] cross_link, arxiv_id = cross_links[0]['href'], cross_links[0].text cross_pdf = cross_links[1]['href'] # figure out which original arxiv this came from try: cltext = link.text cltext_xlind_start = cltext.index('cross-list') cltext_xlind_end = cltext.index('[pdf') - 2 # annotate the title with the original arxiv category cltext = cltext[cltext_xlind_start:cltext_xlind_end] cross_title = u'[%s] %s' % (cltext, cross_title) # if the cross-list doesn't say where it came from, just add a # [cross-list] annotation except: cross_title = u'[cross-list] %s' % cross_title try: comment_contents = data.find('div', class_='list-comments').contents[2:] cross_comments = squeeze(' '.join([ str(x).lstrip('\n').rstrip('\n') for x in comment_contents ]).strip()) # handle internal arxiv links correctly if '<a href="/abs' in paper_comments: paper_comments = paper_comments.replace( '/abs', 'https://arxiv.org/abs') except AttributeError: cross_comments = '' crossdict[ind + 1] = { 'authors': cross_authors, 'title': cross_title, 'abstract': cross_abstract, 'comments': cross_comments, 'arxiv': arxiv_id, 'link': cross_link, 'pdf': cross_pdf } return paperdict, crossdict
def get_varfeatures(lcfile, outdir, timecols=None, magcols=None, errcols=None, mindet=1000, lcformat='hat-sql', lcformatdir=None): '''This runs :py:func:`astrobase.varclass.varfeatures.all_nonperiodic_features` on a single LC file. Parameters ---------- lcfile : str The input light curve to process. outfile : str The filename of the output variable features pickle that will be generated. timecols : list of str or None The timecol keys to use from the lcdict in calculating the features. magcols : list of str or None The magcol keys to use from the lcdict in calculating the features. errcols : list of str or None The errcol keys to use from the lcdict in calculating the features. mindet : int The minimum number of LC points required to generate variability features. lcformat : str This is the `formatkey` associated with your light curve format, which you previously passed in to the `lcproc.register_lcformat` function. This will be used to look up how to find and read the light curves specified in `basedir` or `use_list_of_filenames`. lcformatdir : str or None If this is provided, gives the path to a directory when you've stored your lcformat description JSONs, other than the usual directories lcproc knows to search for them in. Use this along with `lcformat` to specify an LC format JSON file that's not currently registered with lcproc. Returns ------- str The generated variability features pickle for the input LC, with results for each magcol in the input `magcol` or light curve format's default `magcol` list. ''' try: formatinfo = get_lcformat(lcformat, use_lcformat_dir=lcformatdir) if formatinfo: (dfileglob, readerfunc, dtimecols, dmagcols, derrcols, magsarefluxes, normfunc) = formatinfo else: LOGERROR("can't figure out the light curve format") return None except Exception: LOGEXCEPTION("can't figure out the light curve format") return None # override the default timecols, magcols, and errcols # using the ones provided to the function if timecols is None: timecols = dtimecols if magcols is None: magcols = dmagcols if errcols is None: errcols = derrcols try: # get the LC into a dict lcdict = readerfunc(lcfile) # this should handle lists/tuples being returned by readerfunc # we assume that the first element is the actual lcdict # FIXME: figure out how to not need this assumption if ((isinstance(lcdict, (list, tuple))) and (isinstance(lcdict[0], dict))): lcdict = lcdict[0] resultdict = { 'objectid': lcdict['objectid'], 'info': lcdict['objectinfo'], 'lcfbasename': os.path.basename(lcfile) } # normalize using the special function if specified if normfunc is not None: lcdict = normfunc(lcdict) for tcol, mcol, ecol in zip(timecols, magcols, errcols): # dereference the columns and get them from the lcdict if '.' in tcol: tcolget = tcol.split('.') else: tcolget = [tcol] times = _dict_get(lcdict, tcolget) if '.' in mcol: mcolget = mcol.split('.') else: mcolget = [mcol] mags = _dict_get(lcdict, mcolget) if '.' in ecol: ecolget = ecol.split('.') else: ecolget = [ecol] errs = _dict_get(lcdict, ecolget) # normalize here if not using special normalization if normfunc is None: ntimes, nmags = normalize_magseries( times, mags, magsarefluxes=magsarefluxes) times, mags, errs = ntimes, nmags, errs # make sure we have finite values finind = np.isfinite(times) & np.isfinite(mags) & np.isfinite(errs) # make sure we have enough finite values if mags[finind].size < mindet: LOGINFO('not enough LC points: %s in normalized %s LC: %s' % (mags[finind].size, mcol, os.path.basename(lcfile))) resultdict[mcol] = None else: # get the features for this magcol lcfeatures = varfeatures.all_nonperiodic_features( times, mags, errs) resultdict[mcol] = lcfeatures # now that we've collected all the magcols, we can choose which is the # "best" magcol. this is defined as the magcol that gives us the # smallest LC MAD. try: magmads = np.zeros(len(magcols)) for mind, mcol in enumerate(magcols): if '.' in mcol: mcolget = mcol.split('.') else: mcolget = [mcol] magmads[mind] = resultdict[mcol]['mad'] # smallest MAD index bestmagcolind = np.where(magmads == np.min(magmads))[0] resultdict['bestmagcol'] = magcols[bestmagcolind] except Exception: resultdict['bestmagcol'] = None outfile = os.path.join( outdir, 'varfeatures-%s.pkl' % squeeze(resultdict['objectid']).replace(' ', '-')) with open(outfile, 'wb') as outfd: pickle.dump(resultdict, outfd, protocol=4) return outfile except Exception as e: LOGEXCEPTION('failed to get LC features for %s because: %s' % (os.path.basename(lcfile), e)) return None
def validate_input_password(email, password, min_length=12, max_match_threshold=50): '''This validates user input passwords. 1. must be at least min_length characters (we'll truncate the password at 1024 characters since we don't want to store entire novels) 2. must not match within max_match_threshold of their email 3. must not match within max_match_threshold of the site's FQDN 4. must not have a single case-folded character take up more than 20% of the length of the password 5. must not be completely numeric ''' messages = [] # we'll ignore any repeated white space and fail immediately if the password # is all white space if len(squeeze(password.strip())) < min_length: LOGGER.warning('password for new account: %s is too short' % email) messages.append('Your password is too short. ' 'It must have at least %s characters.' % min_length) passlen_ok = False else: passlen_ok = True # check the fuzzy match against the FQDN and email address fqdn = socket.getfqdn() fqdn_match = UQRatio(password, fqdn) email_match = UQRatio(password, email) fqdn_ok = fqdn_match < max_match_threshold email_ok = email_match < max_match_threshold if not fqdn_ok or not email_ok: LOGGER.warning('password for new account: %s matches FQDN ' '(similarity: %s) or their email address ' '(similarity: %s)' % (email, fqdn_match, email_match)) messages.append('Your password is too similar to either ' 'the domain name of this LCC-Server or your ' 'own email address.') # next, check if the password is complex enough histogram = {} for char in password: if char.lower() not in histogram: histogram[char.lower()] = 1 else: histogram[char.lower()] = histogram[char.lower()] + 1 hist_ok = True for h in histogram: if (histogram[h]/len(password)) > 0.2: hist_ok = False LOGGER.warning('one character is more than ' '0.2 x length of the password') messages.append( 'Your password is not complex enough. ' 'One or more characters appear appear too frequently.' ) break # check if the password is all numeric if password.isdigit(): numeric_ok = False messages.append('Your password cannot be all numbers.') else: numeric_ok = True return ( (passlen_ok and email_ok and fqdn_ok and hist_ok and numeric_ok), messages )
def test_squeeze(self): self.assertEqual( squeeze(u"sequences of whitespace chars"), u"sequences of whitespace chars", )
def timebinlc(lcfile, binsizesec, outdir=None, lcformat='hat-sql', lcformatdir=None, timecols=None, magcols=None, errcols=None, minbinelems=7): '''This bins the given light curve file in time using the specified bin size. Parameters ---------- lcfile : str The file name to process. binsizesec : float The time bin-size in seconds. outdir : str or None If this is a str, the output LC will be written to `outdir`. If this is None, the output LC will be written to the same directory as `lcfile`. lcformat : str This is the `formatkey` associated with your light curve format, which you previously passed in to the `lcproc.register_lcformat` function. This will be used to look up how to find and read the light curve file. lcformatdir : str or None If this is provided, gives the path to a directory when you've stored your lcformat description JSONs, other than the usual directories lcproc knows to search for them in. Use this along with `lcformat` to specify an LC format JSON file that's not currently registered with lcproc. timecols,magcols,errcols : lists of str The keys in the lcdict produced by your light curve reader function that correspond to the times, mags/fluxes, and associated measurement errors that will be used as inputs to the binning process. If these are None, the default values for `timecols`, `magcols`, and `errcols` for your light curve format will be used here. minbinelems : int The minimum number of time-bin elements required to accept a time-bin as valid for the output binned light curve. Returns ------- str The name of the output pickle file with the binned LC. Writes the output binned light curve to a pickle that contains the lcdict with an added `lcdict['binned'][magcol]` key, which contains the binned times, mags/fluxes, and errs as `lcdict['binned'][magcol]['times']`, `lcdict['binned'][magcol]['mags']`, and `lcdict['epd'][magcol]['errs']` for each `magcol` provided in the input or default `magcols` value for this light curve format. ''' try: formatinfo = get_lcformat(lcformat, use_lcformat_dir=lcformatdir) if formatinfo: (dfileglob, readerfunc, dtimecols, dmagcols, derrcols, magsarefluxes, normfunc) = formatinfo else: LOGERROR("can't figure out the light curve format") return None except Exception as e: LOGEXCEPTION("can't figure out the light curve format") return None # override the default timecols, magcols, and errcols # using the ones provided to the function if timecols is None: timecols = dtimecols if magcols is None: magcols = dmagcols if errcols is None: errcols = derrcols # get the LC into a dict lcdict = readerfunc(lcfile) # this should handle lists/tuples being returned by readerfunc # we assume that the first element is the actual lcdict # FIXME: figure out how to not need this assumption if ((isinstance(lcdict, (list, tuple))) and (isinstance(lcdict[0], dict))): lcdict = lcdict[0] # skip already binned light curves if 'binned' in lcdict: LOGERROR('this light curve appears to be binned already, skipping...') return None lcdict['binned'] = {} for tcol, mcol, ecol in zip(timecols, magcols, errcols): # dereference the columns and get them from the lcdict if '.' in tcol: tcolget = tcol.split('.') else: tcolget = [tcol] times = _dict_get(lcdict, tcolget) if '.' in mcol: mcolget = mcol.split('.') else: mcolget = [mcol] mags = _dict_get(lcdict, mcolget) if '.' in ecol: ecolget = ecol.split('.') else: ecolget = [ecol] errs = _dict_get(lcdict, ecolget) # normalize here if not using special normalization if normfunc is None: ntimes, nmags = normalize_magseries(times, mags, magsarefluxes=magsarefluxes) times, mags, errs = ntimes, nmags, errs # now bin the mag series as requested binned = time_bin_magseries_with_errs(times, mags, errs, binsize=binsizesec, minbinelems=minbinelems) # put this into the special binned key of the lcdict lcdict['binned'][mcol] = { 'times': binned['binnedtimes'], 'mags': binned['binnedmags'], 'errs': binned['binnederrs'], 'nbins': binned['nbins'], 'timebins': binned['jdbins'], 'binsizesec': binsizesec } # done with binning for all magcols, now generate the output file # this will always be a pickle if outdir is None: outdir = os.path.dirname(lcfile) outfile = os.path.join( outdir, '%s-binned%.1fsec-%s.pkl' % (squeeze(lcdict['objectid']).replace(' ', '-'), binsizesec, lcformat)) with open(outfile, 'wb') as outfd: pickle.dump(lcdict, outfd, protocol=pickle.HIGHEST_PROTOCOL) return outfile
def doc_render_worker(docpage, basedir, serverindex, siteindex): '''This is a worker that renders Markdown to HTML markup. Works in a background Executor. serverindex and siteindex are the dicts containing server and site doc page titles and doc page names. ''' # check for shady doc pages if '.' in docpage: return None, None if '/' in docpage: return None, None if len(docpage) != len(squeeze(docpage).strip().replace(' ','')): return None, None # find the doc page requested if docpage in serverindex: page_title = serverindex[docpage] doc_md_file = os.path.join(os.path.dirname(__file__), '..', 'server-docs', '%s.md' % docpage) elif docpage in siteindex: page_title = siteindex[docpage] doc_md_file = os.path.join(basedir,'docs', '%s.md' % docpage) # if the doc page is not found in either index, then it doesn't exist else: return None, None # check for some more shenanigans if not os.path.exists(doc_md_file): return None, None doc_md_dir_abspath = os.path.dirname(os.path.abspath(doc_md_file)) if docpage in serverindex: doc_dir_abspath = os.path.abspath( os.path.join(os.path.dirname(__file__), '..', 'server-docs') ) elif docpage in siteindex: doc_dir_abspath = os.path.abspath(os.path.join(basedir,'docs')) if (doc_md_dir_abspath != doc_dir_abspath): return None, None # we'll open in 'r' mode since we want unicode for markdown with open(doc_md_file,'r') as infd: doc_markdown = infd.read() LOGGER.info('read %s for requested docs page: %s...' % (doc_md_file, docpage)) # render the markdown to HTML doc_html = markdown.markdown( doc_markdown, output_format='html5', extensions=['markdown.extensions.extra', 'markdown.extensions.codehilite', 'markdown.extensions.toc', 'markdown.extensions.tables'], extension_configs={ 'markdown.extensions.codehilite':{ 'guess_lang': False }, 'markdown.extensions.toc':{ 'anchorlink': True }, } ) return doc_html, page_title
def get_starfeatures(lcfile, outdir, kdtree, objlist, lcflist, neighbor_radius_arcsec, deredden=True, custom_bandpasses=None, lcformat='hat-sql', lcformatdir=None): '''This runs the functions from :py:func:`astrobase.varclass.starfeatures` on a single light curve file. Parameters ---------- lcfile : str This is the LC file to extract star features for. outdir : str This is the directory to write the output pickle to. kdtree: scipy.spatial.cKDTree This is a `scipy.spatial.KDTree` or `cKDTree` used to calculate neighbor proximity features. This is for the light curve catalog this object is in. objlist : np.array This is a Numpy array of object IDs in the same order as the `kdtree.data` np.array. This is for the light curve catalog this object is in. lcflist : np.array This is a Numpy array of light curve filenames in the same order as `kdtree.data`. This is for the light curve catalog this object is in. neighbor_radius_arcsec : float This indicates the radius in arcsec to search for neighbors for this object using the light curve catalog's `kdtree`, `objlist`, `lcflist`, and in GAIA. deredden : bool This controls if the colors and any color classifications will be dereddened using 2MASS DUST. custom_bandpasses : dict or None This is a dict used to define any custom bandpasses in the `in_objectinfo` dict you want to make this function aware of and generate colors for. Use the format below for this dict:: { '<bandpass_key_1>':{'dustkey':'<twomass_dust_key_1>', 'label':'<band_label_1>' 'colors':[['<bandkey1>-<bandkey2>', '<BAND1> - <BAND2>'], ['<bandkey3>-<bandkey4>', '<BAND3> - <BAND4>']]}, . ... . '<bandpass_key_N>':{'dustkey':'<twomass_dust_key_N>', 'label':'<band_label_N>' 'colors':[['<bandkey1>-<bandkey2>', '<BAND1> - <BAND2>'], ['<bandkey3>-<bandkey4>', '<BAND3> - <BAND4>']]}, } Where: `bandpass_key` is a key to use to refer to this bandpass in the `objectinfo` dict, e.g. 'sdssg' for SDSS g band `twomass_dust_key` is the key to use in the 2MASS DUST result table for reddening per band-pass. For example, given the following DUST result table (using http://irsa.ipac.caltech.edu/applications/DUST/):: |Filter_name|LamEff |A_over_E_B_V_SandF|A_SandF|A_over_E_B_V_SFD|A_SFD| |char |float |float |float |float |float| | |microns| |mags | |mags | CTIO U 0.3734 4.107 0.209 4.968 0.253 CTIO B 0.4309 3.641 0.186 4.325 0.221 CTIO V 0.5517 2.682 0.137 3.240 0.165 . . ... The `twomass_dust_key` for 'vmag' would be 'CTIO V'. If you want to skip DUST lookup and want to pass in a specific reddening magnitude for your bandpass, use a float for the value of `twomass_dust_key`. If you want to skip DUST lookup entirely for this bandpass, use None for the value of `twomass_dust_key`. `band_label` is the label to use for this bandpass, e.g. 'W1' for WISE-1 band, 'u' for SDSS u, etc. The 'colors' list contains color definitions for all colors you want to generate using this bandpass. this list contains elements of the form:: ['<bandkey1>-<bandkey2>','<BAND1> - <BAND2>'] where the the first item is the bandpass keys making up this color, and the second item is the label for this color to be used by the frontends. An example:: ['sdssu-sdssg','u - g'] lcformat : str This is the `formatkey` associated with your light curve format, which you previously passed in to the `lcproc.register_lcformat` function. This will be used to look up how to find and read the light curves specified in `basedir` or `use_list_of_filenames`. lcformatdir : str or None If this is provided, gives the path to a directory when you've stored your lcformat description JSONs, other than the usual directories lcproc knows to search for them in. Use this along with `lcformat` to specify an LC format JSON file that's not currently registered with lcproc. Returns ------- str Path to the output pickle containing all of the star features for this object. ''' try: formatinfo = get_lcformat(lcformat, use_lcformat_dir=lcformatdir) if formatinfo: (dfileglob, readerfunc, dtimecols, dmagcols, derrcols, magsarefluxes, normfunc) = formatinfo else: LOGERROR("can't figure out the light curve format") return None except Exception as e: LOGEXCEPTION("can't figure out the light curve format") return None try: # get the LC into a dict lcdict = readerfunc(lcfile) # this should handle lists/tuples being returned by readerfunc # we assume that the first element is the actual lcdict # FIXME: figure out how to not need this assumption if ((isinstance(lcdict, (list, tuple))) and (isinstance(lcdict[0], dict))): lcdict = lcdict[0] resultdict = { 'objectid': lcdict['objectid'], 'info': lcdict['objectinfo'], 'lcfbasename': os.path.basename(lcfile) } # run the coord features first coordfeat = starfeatures.coord_features(lcdict['objectinfo']) # next, run the color features colorfeat = starfeatures.color_features( lcdict['objectinfo'], deredden=deredden, custom_bandpasses=custom_bandpasses) # run a rough color classification colorclass = starfeatures.color_classification(colorfeat, coordfeat) # finally, run the neighbor features nbrfeat = starfeatures.neighbor_gaia_features(lcdict['objectinfo'], kdtree, neighbor_radius_arcsec) # get the objectids of the neighbors found if any if nbrfeat['nbrindices'].size > 0: nbrfeat['nbrobjectids'] = objlist[nbrfeat['nbrindices']] nbrfeat['closestnbrobjectid'] = objlist[ nbrfeat['closestdistnbrind']] nbrfeat['closestnbrlcfname'] = lcflist[ nbrfeat['closestdistnbrind']] else: nbrfeat['nbrobjectids'] = np.array([]) nbrfeat['closestnbrobjectid'] = np.array([]) nbrfeat['closestnbrlcfname'] = np.array([]) # update the result dict resultdict.update(coordfeat) resultdict.update(colorfeat) resultdict.update(colorclass) resultdict.update(nbrfeat) outfile = os.path.join( outdir, 'starfeatures-%s.pkl' % squeeze(resultdict['objectid']).replace(' ', '-')) with open(outfile, 'wb') as outfd: pickle.dump(resultdict, outfd, protocol=4) return outfile except Exception as e: LOGEXCEPTION('failed to get star features for %s because: %s' % (os.path.basename(lcfile), e)) return None
def tag_local_authors(arxiv_date, database=None, match_threshold=0.93, update_db=False): ''' This finds all local authors for all papers on the date arxiv_date and tags the rows for them in the DB. ''' # open the database if needed and get a cursor if not database: database, cursor = opendb() closedb = True else: cursor = database.cursor() closedb = False # get all local authors first query = 'select author from local_authors' cursor.execute(query) rows = cursor.fetchall() if rows and len(rows) > 0: local_authors = list(zip(*rows)[0]) local_authors = [x.lower() for x in local_authors] local_authors = [x.replace('.',' ') for x in local_authors] local_authors = [squeeze(x) for x in local_authors] # this contains firstinitial-lastname pairs local_author_fnames = [x.split() for x in local_authors] local_author_fnames = [''.join([x[0][0],x[-1]]) for x in local_author_fnames] local_authors = [x.replace(' ','') for x in local_authors] else: local_authors = [] if len(local_authors) > 0: # get all the authors for this date query = 'select arxiv_id, authors from arxiv where utcdate = date(?)' query_params = (arxiv_date,) cursor.execute(query, query_params) rows = cursor.fetchall() if rows and len(rows) > 0: local_author_articles = [] for row in rows: paper_authors = row[1] paper_authors = (paper_authors.split(': ')[-1]).split(',') # normalize these names so we can compare them more robustly to # the local authors paper_authors = [x.lower().strip() for x in paper_authors] paper_authors = [x.split('(')[0] for x in paper_authors] paper_authors = [x.strip() for x in paper_authors if len(x) > 1] paper_authors = [x.replace('.',' ') for x in paper_authors] paper_authors = [squeeze(x) for x in paper_authors] paper_author_fnames = [x.split() for x in paper_authors] paper_author_fnames = [''.join([x[0][0],x[-1]]) for x in paper_author_fnames] paper_authors = [x.replace(' ','') for x in paper_authors] # match to the flastname first, then if that works, try another # match with fullname. if both work, then we accept this as a # local author match for paper_author, paper_fname in zip(paper_authors, paper_author_fnames): matched_author_fname = difflib.get_close_matches( paper_fname, local_author_fnames, n=1, cutoff=match_threshold ) if matched_author_fname: # this is a bit lower to allow looser matches between # f. m. lastname in the paper authors list and first # lastname pairs in the local authors list matched_author_full = difflib.get_close_matches( paper_author, local_authors, n=1, cutoff=0.7 ) if matched_author_full: local_author_articles.append((row[0])) print('%s: %s, matched paper author: %s ' 'to local author: %s' % (row[0], paper_authors, paper_author, matched_author_full)) if update_db: cursor.execute('update arxiv ' 'set local_authors = ? where ' 'arxiv_id = ?', (True, row[0],)) break if update_db: database.commit() return local_author_articles else: print('no articles for this date') return False else: print('no local authors defined') return False # at the end, close the cursor and DB connection if closedb: cursor.close() database.close()
def create_new_user( payload: dict, min_pass_length: int = 12, max_unsafe_similarity: int = 33, override_authdb_path: str = None, raiseonfail: bool = False, config: SimpleNamespace = None, ) -> dict: """Makes a new user. Parameters ---------- payload : dict This is a dict with the following required keys: - full_name: str. Full name for the user - email: str. User's email address - password: str. User's password. Optional payload items include: - extra_info: dict. optional dict to add any extra info for this user, will be stored as JSON in the DB - verify_retry_wait: int, default: 6. This sets the amount of time in hours a user must wait before retrying a failed verification action, i.e., responding before expiry of and with the correct verification token. - system_id: str. If this is provided, must be a unique string that will serve as the system_id for the user. This ID is safe to share with client JS, etc., as opposed to the user_id primary key for the user. If not provided, a UUIDv4 will be generated and used for the system_id. - public_suffix_list: list of str. If this is provided as a payload item, it must be a list of domain name suffixes sources from the Mozilla Public Suffix list: https://publicsuffix.org/list/. This is used to check if the full name of the user may possibly be a spam link intended to be used when the authnzerver emails out verification tokens for new users. If the full name contains a suffix in this list, the user creation request will fail. If this item is not provided in the payload, this function will look up the current process's namespace to see if it was loaded there and use it from there if so. If the public suffix list can't be found in either item, new user creation will fail. In addition to these items received from an authnzerver client, the payload must also include the following keys (usually added in by a wrapping function): - reqid: int or str - pii_salt: str override_authdb_path : str or None If given as a str, is the alternative path to the auth DB. raiseonfail : bool If True, will raise an Exception if something goes wrong. min_pass_length : int The minimum required character length of the password. max_unsafe_similarity : int The maximum ratio required to fuzzy-match the input password against the server's domain name, the user's email, or their name. config : SimpleNamespace object or None An object containing systemwide config variables as attributes. This is useful when the wrapping function needs to pass in some settings directly from environment variables. Returns ------- dict Returns a dict with the user's user_id and user_email, and a boolean for send_verification. Notes ----- If the email address already exists in the database, then either the user has forgotten that they have an account or someone else is being annoying. In this case, if is_active is True, we'll tell the user that we've sent an email but won't do anything. If is_active is False and emailverify_sent_datetime is at least *payload['verify_retry_wait']* hours in the past, we'll send a new email verification email and update the emailverify_sent_datetime. In this case, we'll just tell the user that we've sent the email but won't tell them if their account exists. Only after the user verifies their email, is_active will be set to True and user_role will be set to 'authenticated'. """ engine, meta, permjson, dbpath = get_procdb_permjson( override_authdb_path=override_authdb_path, override_permissions_json=None, raiseonfail=raiseonfail, ) for key in ("reqid", "pii_salt"): if key not in payload: LOGGER.error( "Missing %s in payload dict. Can't process this request." % key) return { "success": False, "user_email": None, "user_id": None, "send_verification": False, "failure_reason": ("invalid request: missing '%s' in request" % key), "messages": ["Invalid user creation request."], } for key in ("full_name", "email", "password"): if key not in payload: LOGGER.error("[%s] Invalid user creation request, missing %s." % (payload["reqid"], key)) return { "success": False, "user_email": None, "user_id": None, "send_verification": False, "failure_reason": ("invalid request: missing '%s' in request" % key), "messages": ["Invalid user creation request."], } # # validate the email provided # # check for Unicode confusables and dangerous usernames email_confusables_ok = validators.validate_confusables_email( payload["email"]) # check if the email is a valid one according to HTML5 specs email_regex_ok = validators.validate_email_address(payload["email"]) # check if the email domain is not a disposable email address if email_confusables_ok and email_regex_ok: email_domain = payload["email"].split("@")[1].casefold() email_domain_not_disposable = ( email_domain not in validators.DISPOSABLE_EMAIL_DOMAINS) else: email_domain_not_disposable = False # if all of the tests above pass, the email is OK email_ok = (email_regex_ok and email_confusables_ok and email_domain_not_disposable) if not email_ok: LOGGER.error("[%s] User creation request failed for " "email: %s. " "The email address provided is not valid." % ( payload["reqid"], pii_hash(payload["email"], payload["pii_salt"]), )) return { "success": False, "user_email": None, "user_id": None, "send_verification": False, "failure_reason": "invalid email", "messages": [ "The email address provided doesn't " "seem to be a valid email address and cannot be used " "to sign up for an account on this server." ], } email = validators.normalize_value(payload["email"]) full_name = validators.normalize_value(payload["full_name"], casefold=False) # sanitize the full name full_name = squeeze(xhtml_escape(full_name)) if "http" in full_name.casefold() or "://" in full_name: LOGGER.error(f"[{payload['reqid']}] Full name provided contains " f"a link or is close to one: {full_name} " f"and is likely suspicious.") return { "success": False, "user_email": None, "user_id": None, "send_verification": False, "failure_reason": "invalid full name", "messages": [ "The full name provided appears to contain " "an HTTP link, and cannot be used " "to sign up for an account on this server." ], } # check if the full name contains a valid public suffix domain # it's probably suspicious if so currproc = mp.current_process() public_suffix_list = getattr(currproc, "public_suffix_list", None) if not public_suffix_list: public_suffix_list = payload.get("public_suffix_list", None) if not public_suffix_list: LOGGER.error( f"[{payload['reqid']}] Could not validate full name " f"because the public suffix list is not provided in " f"either the payload or in the current process namespace.") return { "success": False, "user_email": None, "user_id": None, "send_verification": False, "failure_reason": "public suffix list not present", "messages": [ "Full name could not be validated " "because of an internal server error" ], } for domain_suffix in public_suffix_list: if domain_suffix in full_name.casefold(): LOGGER.error(f"[{payload['reqid']}] Full name provided contains " f"a link or is close to one: {full_name} " f"and is likely suspicious.") return { "success": False, "user_email": None, "user_id": None, "send_verification": False, "failure_reason": "invalid full name", "messages": [ "The full name provided appears to contain " "an HTTP link, and cannot be used " "to sign up for an account on this server." ], } # get the password password = payload["password"] # # optional items # # 1. get extra info if any extra_info = payload.get("extra_info", None) # 2. get the verify_retry_wait time verify_retry_wait = payload.get("verify_retry_wait", 6) try: verify_retry_wait = int(verify_retry_wait) except Exception: verify_retry_wait = 6 if verify_retry_wait < 1: verify_retry_wait = 1 # 3. generate or get a system_id for this user if "system_id" in payload and isinstance(payload["system_id"], str): system_id = payload["system_id"] else: system_id = str(uuid.uuid4()) # # proceed to processing # users = meta.tables["users"] # the password is restricted to 256 characters since that should be enough # (for 2020), and we don't want to kill our own server when hashing absurdly # long passwords through Argon2-id. input_password = password[:256] # hash the user's password hashed_password = pass_hasher.hash(input_password) # validate the input password to see if it's OK # do this here to make sure the password hash completes at least once passok, messages = validate_input_password( full_name, email, input_password, payload["pii_salt"], payload["reqid"], min_pass_length=min_pass_length, max_unsafe_similarity=max_unsafe_similarity, config=config, ) if not passok: LOGGER.error("[%s] User creation request failed for " "email: %s. " "The password provided is not secure." % ( payload["reqid"], pii_hash(payload["email"], payload["pii_salt"]), )) return { "success": False, "user_email": email, "user_id": None, "send_verification": False, "failure_reason": "invalid password", "messages": messages, } # insert stuff into the user's table, set is_active = False, user_role = # 'locked', the emailverify_sent_datetime to datetime.utcnow() new_user_dict = None try: if not extra_info: extra_info = { "provenance": "request-created", "type": "normal-user", "verify_retry_wait": verify_retry_wait, } else: extra_info.update({ "provenance": "request-created", "type": "normal-user", "verify_retry_wait": verify_retry_wait, }) new_user_dict = { "full_name": full_name, "system_id": system_id, "password": hashed_password, "email": email, "email_verified": False, "is_active": False, "emailverify_sent_datetime": datetime.utcnow(), "created_on": datetime.utcnow(), "user_role": "locked", "last_updated": datetime.utcnow(), "extra_info": extra_info, } with engine.begin() as conn: ins = insert(users).values(new_user_dict) conn.execute(ins) user_added = True # this will catch stuff like people trying to sign up again with their email # address except Exception: user_added = False with engine.begin() as conn: # get back the user ID sel = (select( users.c.email, users.c.user_id, users.c.system_id, users.c.is_active, users.c.emailverify_sent_datetime, ).select_from(users).where(users.c.email == email)) result = conn.execute(sel) rows = result.first() # if the user was added successfully, tell the frontend all is good and to # send a verification email if user_added and rows: LOGGER.info("[%s] User creation request succeeded for " "email: %s. New user_id: %s" % ( payload["reqid"], pii_hash(payload["email"], payload["pii_salt"]), pii_hash(rows.user_id, payload["pii_salt"]), )) messages.append( "User account created. Please verify your email address to log in." ) return { "success": True, "user_email": rows.email, "user_id": rows.user_id, "system_id": rows.system_id, "send_verification": True, "messages": messages, } # if the user wasn't added successfully, then they exist in the DB already elif (not user_added) and rows: LOGGER.error("[%s] User creation request failed for " "email: %s. " "The email provided probably exists in the DB already. " % ( payload["reqid"], pii_hash(payload["email"], payload["pii_salt"]), )) # check the timedelta between now and the emailverify_sent_datetime verification_timedelta = (datetime.utcnow() - rows.emailverify_sent_datetime) # this sets whether we should resend the verification email resend_verification = (not rows.is_active) and ( verification_timedelta > timedelta(hours=verify_retry_wait)) LOGGER.warning( "[%s] Existing user_id = %s for new user creation " "request with email = %s, is_active = %s. " "Email verification originally sent at = %sZ, " "verification timedelta: %s, verify_retry_wait = %s hours. " "Will resend verification = %s" % ( payload["reqid"], pii_hash(rows.user_id, payload["pii_salt"]), pii_hash(payload["email"], payload["pii_salt"]), rows.is_active, rows.emailverify_sent_datetime.isoformat(), verification_timedelta, verify_retry_wait, resend_verification, )) if resend_verification: # if we're going to resend the verification, update the users table # with the latest info sent by the user (they might've changed their # password in the meantime) if new_user_dict is not None: del new_user_dict["created_on"] del new_user_dict["system_id"] with engine.begin() as conn: upd = (users.update().where( users.c.user_id == rows.user_id).values(new_user_dict)) conn.execute(upd) # get back the user ID sel = (select( users.c.email, users.c.user_id, users.c.system_id, users.c.is_active, users.c.emailverify_sent_datetime, ).select_from(users).where(users.c.email == email)) result = conn.execute(sel) rows = result.first() LOGGER.warning( "[%s] Resending verification to user: %s because timedelta " "between original sign up and retry: %s > " "verify_retry_wait: %s hours. " "User information has been updated " "with their latest provided sign-up info." % ( payload["reqid"], pii_hash(rows.user_id, payload["pii_salt"]), verification_timedelta, verify_retry_wait, )) messages.append( "User account created. Please verify your email address to log in." ) return { "success": False, "user_email": rows.email, "user_id": rows.user_id, "system_id": rows.system_id, "send_verification": resend_verification, "failure_reason": "user exists", "messages": messages, } # otherwise, the user wasn't added successfully and they don't already exist # in the database so something else went wrong. else: LOGGER.error("[%s] User creation request failed for email: %s. " "Could not add row to the DB." % ( payload["reqid"], pii_hash(payload["email"], payload["pii_salt"]), )) messages.append( "User account created. Please verify your email address to log in." ) return { "success": False, "user_email": None, "user_id": None, "send_verification": False, "failure_reason": "DB issue with user creation", "messages": messages, }
# -*- coding: utf-8 -*- # # Copyright(c) 2015 http://feilong.me # # @author: Felinx Lee <*****@*****.**> # from tornado.httpclient import HTTPClient, HTTPRequest from tornado import escape import hashlib cookie = "csrftoken=NVddnpsZKq1XJHzdVb5vzv4nMULCUI6r" # Your current cookie cookie = escape.squeeze(cookie) agent = "Instagram 6.14.0 (iPhone7,1; iPhone OS 8_3; zh_CN;zh-Hans) AppleWebKit/420+" url = "https://i.instagram.com/api/v1/feed/user/1419637354" # USER_ID should be replaced by real ID like 12345678 def main(): client = HTTPClient() request = HTTPRequest(url, headers={"Cookie": cookie, "User-Agent": agent}) response = client.fetch(request) resp = escape.json_decode(response.body) items = resp.get("items", []) for item in items: imgurl = item["image_versions2"]["candidates"][0]["url"] urlmd5 = hashlib.md5(imgurl).hexdigest() print ("wget '%s' -O %s.jpg" % (imgurl, urlmd5)) # or run this command in Python or request img file again if __name__ == '__main__': main()
def _write_checkplot_picklefile(checkplotdict, outfile=None, protocol=None, outgzip=False): '''This writes the checkplotdict to a (gzipped) pickle file. Parameters ---------- checkplotdict : dict This the checkplotdict to write to the pickle file. outfile : None or str The path to the output pickle file to write. If `outfile` is None, writes a (gzipped) pickle file of the form: checkplot-{objectid}.pkl(.gz) to the current directory. protocol : int This sets the pickle file protocol to use when writing the pickle: If None, will choose a protocol using the following rules: - 4 -> default in Python >= 3.4 - fast but incompatible with Python 2 - 3 -> default in Python 3.0-3.3 - mildly fast - 2 -> default in Python 2 - very slow, but compatible with Python 2/3 The default protocol kwarg is None, this will make an automatic choice for pickle protocol that's best suited for the version of Python in use. Note that this will make pickles generated by Py3 incompatible with Py2. outgzip : bool If this is True, will gzip the output file. Note that if the `outfile` str ends in a gzip, this will be automatically turned on. Returns ------- str The absolute path to the written checkplot pickle file. None if writing fails. ''' # for Python >= 3.4; use v4 by default if not protocol: protocol = 4 if outgzip: if not outfile: outfile = ( 'checkplot-{objectid}.pkl.gz'.format( objectid=squeeze(checkplotdict['objectid']).replace(' ','-') ) ) with gzip.open(outfile,'wb') as outfd: pickle.dump(checkplotdict,outfd,protocol=protocol) else: if not outfile: outfile = ( 'checkplot-{objectid}.pkl'.format( objectid=squeeze(checkplotdict['objectid']).replace(' ','-') ) ) # make sure to do the right thing if '.gz' is in the filename but # outgzip was False if outfile.endswith('.gz'): LOGWARNING('output filename ends with .gz but kwarg outgzip=False. ' 'will use gzip to compress the output pickle') with gzip.open(outfile,'wb') as outfd: pickle.dump(checkplotdict,outfd,protocol=protocol) else: with open(outfile,'wb') as outfd: pickle.dump(checkplotdict,outfd,protocol=protocol) return os.path.abspath(outfile)
def _FormatConversationEmail(cls, client, recipient_id, viewpoint, activity): """Constructs an email which alerts the recipient that they have access to a new conversation, either due to a share_new operation, or to an add_followers operation. The email includes a clickable link to the conversation on the web site. """ from viewfinder.backend.db.identity import Identity from viewfinder.backend.db.photo import Photo from viewfinder.backend.db.user import User # Get email address of recipient. recipient_user = yield gen.Task(User.Query, client, recipient_id, None) if recipient_user.email is None: # No email address associated with user, so can't send email. raise gen.Return(None) identity_key = 'Email:%s' % recipient_user.email # Create ShortURL that sets prospective user cookie and then redirects to the conversation. viewpoint_url = yield AlertManager._CreateViewpointURL( client, recipient_user, identity_key, viewpoint) sharer = yield gen.Task(User.Query, client, activity.user_id, None) sharer_name = AlertManager._GetNameFromUser(sharer, prefer_given_name=False) # Create the cover photo ShortURL by appending a "next" query parameter to the viewpoint ShortURL. cover_photo_url = None cover_photo_height = None cover_photo_width = None if viewpoint.cover_photo != None: next_url = '/episodes/%s/photos/%s.f' % ( viewpoint.cover_photo['episode_id'], viewpoint.cover_photo['photo_id']) cover_photo_url = "%s?%s" % (viewpoint_url, urlencode(dict(next=next_url))) photo = yield gen.Task(Photo.Query, client, viewpoint.cover_photo['photo_id'], None) if photo.aspect_ratio < 1: cover_photo_height = AlertManager._MAX_COVER_PHOTO_DIM cover_photo_width = int(AlertManager._MAX_COVER_PHOTO_DIM * photo.aspect_ratio) else: cover_photo_width = AlertManager._MAX_COVER_PHOTO_DIM cover_photo_height = int(AlertManager._MAX_COVER_PHOTO_DIM / photo.aspect_ratio) email_args = { 'from': EmailManager.Instance().GetInfoAddress(), 'to': recipient_user.email, 'subject': '%s added you to a conversation' % sharer_name } util.SetIfNotEmpty(email_args, 'toname', recipient_user.name) if sharer_name: email_args['fromname'] = '%s via Viewfinder' % sharer_name # Create the unsubscribe URL. unsubscribe_cookie = User.CreateUnsubscribeCookie( recipient_id, AccountSettings.EMAIL_ALERTS) unsubscribe_url = 'https://%s/unsubscribe?%s' % ( options.options.domain, urlencode(dict(cookie=unsubscribe_cookie))) # Set viewpoint title. viewpoint_title = viewpoint.title if viewpoint is not None else None fmt_args = { 'cover_photo_url': cover_photo_url, 'cover_photo_height': cover_photo_height, 'cover_photo_width': cover_photo_width, 'viewpoint_url': viewpoint_url, 'unsubscribe_url': unsubscribe_url, 'sharer_name': sharer_name, 'viewpoint_title': viewpoint_title, 'toname': recipient_user.name } resources_mgr = ResourcesManager.Instance() email_args['html'] = escape.squeeze( resources_mgr.GenerateTemplate('alert_conv_base.email', is_html=True, **fmt_args)) email_args['text'] = resources_mgr.GenerateTemplate( 'alert_conv_base.email', is_html=False, **fmt_args) raise gen.Return(email_args)
def _FormatConversationEmail(cls, client, recipient_id, viewpoint, activity): """Constructs an email which alerts the recipient that they have access to a new conversation, either due to a share_new operation, or to an add_followers operation. The email includes a clickable link to the conversation on the web site. """ from viewfinder.backend.db.identity import Identity from viewfinder.backend.db.photo import Photo from viewfinder.backend.db.user import User # Get email address of recipient. recipient_user = yield gen.Task(User.Query, client, recipient_id, None) if recipient_user.email is None: # No email address associated with user, so can't send email. raise gen.Return(None) identity_key = 'Email:%s' % recipient_user.email # Create ShortURL that sets prospective user cookie and then redirects to the conversation. viewpoint_url = yield AlertManager._CreateViewpointURL(client, recipient_user, identity_key, viewpoint) sharer = yield gen.Task(User.Query, client, activity.user_id, None) sharer_name = AlertManager._GetNameFromUser(sharer, prefer_given_name=False) # Create the cover photo ShortURL by appending a "next" query parameter to the viewpoint ShortURL. cover_photo_url = None cover_photo_height = None cover_photo_width = None if viewpoint.cover_photo != None: next_url = '/episodes/%s/photos/%s.f' % (viewpoint.cover_photo['episode_id'], viewpoint.cover_photo['photo_id']) cover_photo_url = "%s?%s" % (viewpoint_url, urlencode(dict(next=next_url))) photo = yield gen.Task(Photo.Query, client, viewpoint.cover_photo['photo_id'], None) if photo.aspect_ratio < 1: cover_photo_height = AlertManager._MAX_COVER_PHOTO_DIM cover_photo_width = int(AlertManager._MAX_COVER_PHOTO_DIM * photo.aspect_ratio) else: cover_photo_width = AlertManager._MAX_COVER_PHOTO_DIM cover_photo_height = int(AlertManager._MAX_COVER_PHOTO_DIM / photo.aspect_ratio) email_args = {'from': EmailManager.Instance().GetInfoAddress(), 'to': recipient_user.email, 'subject': '%s added you to a conversation' % sharer_name} util.SetIfNotEmpty(email_args, 'toname', recipient_user.name) if sharer_name: email_args['fromname'] = '%s via Viewfinder' % sharer_name # Create the unsubscribe URL. unsubscribe_cookie = User.CreateUnsubscribeCookie(recipient_id, AccountSettings.EMAIL_ALERTS) unsubscribe_url = 'https://%s/unsubscribe?%s' % (options.options.domain, urlencode(dict(cookie=unsubscribe_cookie))) # Set viewpoint title. viewpoint_title = viewpoint.title if viewpoint is not None else None fmt_args = {'cover_photo_url': cover_photo_url, 'cover_photo_height': cover_photo_height, 'cover_photo_width': cover_photo_width, 'viewpoint_url': viewpoint_url, 'unsubscribe_url': unsubscribe_url, 'sharer_name': sharer_name, 'viewpoint_title': viewpoint_title, 'toname': recipient_user.name} resources_mgr = ResourcesManager.Instance() email_args['html'] = escape.squeeze(resources_mgr.GenerateTemplate('alert_conv_base.email', is_html=True, **fmt_args)) email_args['text'] = resources_mgr.GenerateTemplate('alert_conv_base.email', is_html=False, **fmt_args) raise gen.Return(email_args)
def test_squeeze(self): self.assertEqual(squeeze(u'sequences of whitespace chars'), u'sequences of whitespace chars')
def validate_input_password(full_name, email, password, min_length=12, max_match_threshold=20): '''This validates user input passwords. 1. must be at least min_length characters (we'll truncate the password at 1024 characters since we don't want to store entire novels) 2. must not match within max_match_threshold of their email or full_name 3. must not match within max_match_threshold of the site's FQDN 4. must not have a single case-folded character take up more than 20% of the length of the password 5. must not be completely numeric 6. must not be in the top 10k passwords list ''' messages = [] # we'll ignore any repeated white space and fail immediately if the password # is all white space if len(squeeze(password.strip())) < min_length: LOGGER.warning('password for new account: %s is too short' % email) messages.append('Your password is too short. ' 'It must have at least %s characters.' % min_length) passlen_ok = False else: passlen_ok = True # check if the password is straight-up dumb if password.casefold() in validators.TOP_10K_PASSWORDS: LOGGER.warning('password for new account: %s is ' 'in top 10k passwords list' % email) messages.append('Your password is on the list of the ' 'most common passwords and is vulnerable to guessing.') tenk_ok = False else: tenk_ok = True # FIXME: also add fuzzy matching to top 10k passwords list to avoid stuff # like 'passwordpasswordpassword' # check the fuzzy match against the FQDN and email address fqdn = socket.getfqdn() fqdn_match = UQRatio(password.casefold(), fqdn.casefold()) email_match = UQRatio(password.casefold(), email.casefold()) name_match = UQRatio(password.casefold(), full_name.casefold()) fqdn_ok = fqdn_match < max_match_threshold email_ok = email_match < max_match_threshold name_ok = name_match < max_match_threshold if not fqdn_ok or not email_ok or not name_ok: LOGGER.warning('password for new account: %s matches FQDN ' '(similarity: %s) or their email address ' '(similarity: %s)' % (email, fqdn_match, email_match)) messages.append('Your password is too similar to either ' 'the domain name of this server or your ' 'own name or email address.') # next, check if the password is complex enough histogram = {} for char in password: if char.lower() not in histogram: histogram[char.lower()] = 1 else: histogram[char.lower()] = histogram[char.lower()] + 1 hist_ok = True for h in histogram: if (histogram[h] / len(password)) > 0.2: hist_ok = False LOGGER.warning('one character is more than ' '0.2 x length of the password') messages.append( 'Your password is not complex enough. ' 'One or more characters appear appear too frequently.') break # check if the password is all numeric if password.isdigit(): numeric_ok = False messages.append('Your password cannot be all numbers.') else: numeric_ok = True return ((passlen_ok and email_ok and name_ok and fqdn_ok and hist_ok and numeric_ok and tenk_ok), messages)