def add_contact(email, first_name, last_name, ip_addr, country_code): if not re.match(r"(^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$)", email): raise Exception('Invalid email') # Emails are stored normalized to lowercase in the DB. email = email.lower() try: # Attempt to load any existing entry matching the email. row = db_models.EmailList.query.filter_by(email=email).first() if row: # Update the existing entry. new_contact = False row.first_name = first_name row.last_name = last_name row.ip_addr = ip_addr row.country_code = country_code else: # Insert a new entry. new_contact = True row = db_models.EmailList() row.email = email row.first_name = first_name row.last_name = last_name row.unsubscribed = False row.ip_addr = ip_addr row.country_code = country_code db.session.add(row) db.session.commit() except Exception as e: log('ERROR add_contact:', type(e), e) raise e return new_contact
def register(): msg = "" if request.method == 'POST': username = request.form['register_username'] u = User(username) validUsername = u.validUser() if u.newUser and validUsername: u.add() log("Adding new user {}".format(username)) url_root = request.url_root t = multiprocessing.Process(target=u.sendCode, args=(url_root,)) t.start() msg = 'We have successfully sent you a code. Please check your LYN inbox!' category = "primary" elif not validUsername: msg = '{} is NOT a valid lowyat.net user.'.format(username) category = "danger" else: msg = '{} is already a registered user.'.format(username) category = "warning" flash(msg, category) return redirect(url_for("index"))
def partners_interest(name, company_name, email, website, note, ip_addr): if not re.match(r"(^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$)", email): return gettext('Please enter a valid email address') if website and not re.match(r"(^(?:http(s)?:\/\/)?[\w.-]+(?:\.[\w\.-]+)+[\w\-\._~:/?#[\]@!\$&'\(\)\*\+,;=.]+$)", website): return gettext('Please enter a valid website address') try: me = db_models.Interest() me.name = name me.company_name = company_name me.email = email me.website = website me.note = note me.ip_addr = ip_addr db.session.add(me) db.session.commit() except Exception as e: log('ERROR partners_interest:', type(e), e) return gettext('Ooops! Something went wrong.') message = "Name: %s<br>Company Name: %s<br>Email: %s<br>Website: %s<br>Note: %s" % (name, company_name, email, website, note) email_types.send_email_type('build_on_origin', DEFAULT_SENDER, email) sgw.notify_admins(message, subject="{name} ({company_name}) is interested in building on Origin".format(name=name, company_name=company_name)) return gettext('Thanks! We\'ll be in touch soon.')
def from_files(body_f, stance_f, max_seq_len, vectorizer, pkl_to, bal_stances): # Body ID is unique bodies_df = pd.read_csv(body_f, index_col='Body ID') # Body ID is NOT unique (multiple stances point to same body) stances_df = pd.read_csv(stance_f) # We don't care about unrelated claims (not relevant to new contest) stances_df = stances_df[stances_df['Stance'] != UNRELATED_KEY] # Laptop can't process all of these, test on a subset # stances_df = stances_df[0:5000] # Balance the dataset if required if bal_stances: stances_df = balance_stances(stances_df) headlines = stances_df['Headline'] bodies = [bodies_df.loc[bodyId, 'articleBody'] for bodyId in stances_df['Body ID']] # Convert stances from text to numerical stances = [stance2idx[stance] for stance in stances_df['Stance']] # Vectorize text if a vectorizer is given if vectorizer is not None: assert (max_seq_len is not None) log("Vectorizing headlines") # Convert headlines to shape (# Sequences, SeqLen, Embedding Dim) headlines = vectorize_texts(vectorizer, headlines, max_seq_len) log("Vectorizing bodies") # Get Body from Stance 'Body ID' -> Convert bodies to shape (# Sequences, SeqLen, Embedding Dim) bodies = vectorize_texts(vectorizer, bodies, max_seq_len) if pkl_to: to_pkl(headlines=headlines, bodies=bodies, stances=stances, path=pkl_to) return headlines, bodies, stances
def train(self, titles, bodies, labels, epochs, seq_len, num_classes=3, batch_size=32, val_split=0.2, verbose=1, logs_name=None): # Do sequence padding titles = pad_sequences(titles, maxlen=seq_len, dtype='float32') bodies = pad_sequences(bodies, maxlen=seq_len, dtype='float32') labels = to_categorical(labels, num_classes=num_classes) class_weights = get_class_weights(labels) log("Calculated class weights") log(class_weights) # Init tensorboard callbacks = [] if logs_name is not None: callbacks.append( TensorBoard(log_dir=get_tb_logdir(f"CiscoCNN_{logs_name}"))) return self.model.fit([titles, bodies], labels, batch_size=batch_size, epochs=epochs, verbose=verbose, validation_split=val_split, class_weight=class_weights, callbacks=callbacks)
def add_sendgrid_contact(email, full_name=None, country_code=None, dapp_user=None): try: # pytest.skip("avoid making remote calls") sg_api = sendgrid.SendGridAPIClient(apikey=constants.SENDGRID_API_KEY) first_name = last_name = None if full_name: name = HumanName(full_name) first_name = name.first last_name = name.last data = [{ "email": email, "first_name": first_name, "last_name": last_name, "country_code": country_code, "dapp_user": dapp_user }] response = sg_api.client.contactdb.recipients.post(request_body=data) except Exception as e: log('ERROR add_sendgrid_contact:', type(e), e) return False return True
def linkLogin(username=None, token=None): page = redirect(url_for("index")) if username and token: u = User(username) firsttime = u.firstTime() if firsttime: c = CryptoFernet() password = c.decrypt(token) log("password is {}".format(password)) user_id = u.validate(password) session['logged_in'] = user_id log("user_id is {}".format(user_id)) if user_id: session["username"] = username session["user_id"] = user_id flash("Please update your password.", "warning") page = render_template("updatePassword.html", oldpassword=password) else: msg = "Wrong username and/or password!" return page
def time2date(timestampp): try: result = datetime.datetime.fromtimestamp(int(timestampp)).strftime('%Y-%m-%d %H:%M:%S') except: log("Unable to convert {}".format(timestampp)) return result
def to_pkl(headlines, bodies, stances, path): try: pd.DataFrame(data={ PICKLE_HEADLINE: headlines, PICKLE_BODY: bodies, PICKLE_STANCE: stances }).to_pickle(path) except Exception as e: log(f"Saving to pickle failed: {e}")
def mass_unsubscribe_sendgrid_contact(emails): try: sg_api = sendgrid.SendGridAPIClient(apikey=constants.SENDGRID_API_KEY) unsubscribe_group = 51716 # Universal unsubscribe group data = {"recipient_emails": emails} response = sg_api.client.asm.groups._( unsubscribe_group).suppressions.post(request_body=data) except Exception as e: log('ERROR mass_unsubscribe_sendgrid_contact:', type(e), e) return False return True
def sendCode(self, url): counter = 0 while (firefoxRunning() and counter < 10): time.sleep(10) counter += 1 log("firefox is running... sleep(10) x {}".format(counter)) with Browser('firefox', headless=True) as self.browser: self.login() self.sendMsg(url) self.logout()
def login(self): b = self.browser b.visit("https://forum.lowyat.net/") b.fill('UserName', self.bot_username) b.fill('PassWord', self.bot_password) log("Login with username {} and password {}".format( self.bot_username, maskPassword(self.bot_password))) button = b.find_by_css('.button') button.click() log("We in!")
def validUser(self): status = False username = self.username url = "https://forum.lowyat.net/user/{}".format(username) log("validating new username {}".format(username)) session = HTMLSession() r = session.get(url) body = r.html try: profileName = body.find("#profilename", first=True).text except: log("Cannot find #profilename tag") try: if self.username.lower() == profileName.lower(): status = True log("Username {} is valid".format(username)) except: status = False log("Username {} is invalid".format(username)) return status
def updateStatus(self, post_id, status): log("stop post_id {}".format(post_id)) post_owner = self.db.query( "SELECT u.username FROM post p, user u WHERE u.user_id=p.user_id AND post_id=:post_id", False, post_id=post_id)[0].username if post_owner.lower() == self.requestor.lower(): self.db.query( "UPDATE post SET status=:status WHERE post_id=:post_id", False, status=status, post_id=post_id)
def unsubscribe(): email = request.args.get("email") if not email or not re.match( r"(^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$)", email): return gettext('Please enter a valid email address') try: mailing_list.unsubscribe(email) mailing_list.unsubscribe_sendgrid_contact(email) feedback = gettext('You have been unsubscribed') except Exception as err: log('Failure: %s' % err) feedback = gettext('Ooops, something went wrong') flash(feedback) return redirect('/en/', code=302)
def getPostInfo(self, post_id): log("get post info of post_id {}".format(post_id)) session = HTMLSession() url = "{}{}".format(self.topic_url, post_id) r = session.get(url) body = r.html topic_owner = body.find("span.normalname", first=True).text title = body.find("title", first=True).text log("topic owner={};title={}".format(topic_owner, title)) return title, topic_owner
def generate(self, data, max_df=0.5, min_df=2, pkl_path=None): assert (isinstance(data, pd.DataFrame)) # Create vectorizers - max_df/min_df are cutoffs for document frequency (float -> %, int -> abs # docs) # Above max_df -> ex. 0.5 -> ignore words that appear in 50% of the documents or more # Below min_df -> ex. 2 -> ignore words that appear than fewer than 2 documents v_all = TfidfVectorizer(ngram_range=(1, 3), max_df=max_df, min_df=min_df) # Vectorizer expects a list of strings, so join headline + article for each pair all_texts = [ ' '.join(row[DG.UNI_HEADLINES]) + ' ' + ' '.join(row[DG.UNI_BODIES]) for idx, row in data.iterrows() ] # Fit all texts vectorizer and create vocabulary v_all.fit(raw_documents=all_texts) vocab = v_all.vocabulary_ # Create a vectorizer for headlines based off of complete vocab v_head = TfidfVectorizer(ngram_range=(1, 3), max_df=max_df, min_df=min_df, vocabulary=vocab) headline_tfidf_list = v_head.fit_transform( raw_documents=data[DG.UNI_HEADLINES].map(lambda x: ' '.join( x)) # Re-concats the unigrams into documents ) # Creates a TFIDF matrix for headlines log(f"Created TFIDF matrix for headlines with shape {headline_tfidf_list.shape}" ) # Do the same for body v_body = TfidfVectorizer(ngram_range=(1, 3), max_df=max_df, min_df=min_df, vocabulary=vocab) body_tfidf_list = v_body.fit_transform( raw_documents=data[DG.UNI_BODIES].map(lambda x: ' '.join(x))) log(f"Created TFIDF matrix for bodies with shape {body_tfidf_list.shape}" ) # Calculate cosine similarities for each headline/body pair cos_similarities = cosine_similarity( headline_tfidf_list, body_tfidf_list) # n samples x n samples matrix cos_similarities = np.diagonal(cos_similarities) return cos_similarities, headline_tfidf_list, body_tfidf_list
def __init__(self, max_seq_len=None, vectorizer=None, stance_f=None, body_f=None, bal_stances=False, pkl_to=None, pkl_from=None): start_time = time.time() if stance_f and body_f: self.headlines, self.bodies, self.stances = from_files(max_seq_len=max_seq_len, body_f=body_f, stance_f=stance_f, pkl_to=pkl_to, vectorizer=vectorizer, bal_stances=bal_stances) elif pkl_from: self.headlines, self.bodies, self.stances = from_pkl(pkl_from, bal_stances=bal_stances) else: raise ValueError("Incorrect params") # self.agree_count, self.disagree_count, self.discuss_count = stance_counts(self.stances) log(f"Stance counts: {stance_counts(self.stances)}") log(f"FNC Data loaded in {time.time() - start_time}s")
def login(self): password = bot_password if self.username != bot_username: password = self.retrievePassword(self.username) self.browser.fill('UserName', self.username) self.browser.fill('PassWord', password) masked_password = maskPassword(password) log("Login with username {} and password {}".format( self.username, masked_password)) button = self.browser.find_by_css('.button') button.click()
def presale(full_name, email, desired_allocation, desired_allocation_currency, citizenship, sending_addr, ip_addr): if not re.match(r"(^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$)", email): return gettext('Please enter a valid email address') try: me = db_models.Presale() me.full_name = full_name me.email = email # me.accredited = (accredited=='1') # me.entity_type = entity_type me.desired_allocation = desired_allocation me.desired_allocation_currency = desired_allocation_currency me.citizenship = citizenship me.sending_addr = sending_addr # me.note = note me.ip_addr = ip_addr db.session.add(me) db.session.commit() except Exception as e: log('ERROR presale:', type(e), e) return gettext('Ooops! Something went wrong.') if sending_addr: sending_addr = "<a href='https://etherscan.io/address/" + sending_addr + "'>" + sending_addr + "</a>" if sending_addr.startswith( '0x' ) else "<a href='https://blockchain.info/address/" + sending_addr + "'>" + sending_addr + "</a>" message = """Name: %s<br> Email: %s<br> Desired allocation: %s %s<br> Citizenship: %s<br> Address: %s<br> IP: %s""" % (full_name, email, desired_allocation, desired_allocation_currency, citizenship, sending_addr, ip_addr) email_types.send_email_type('presale', DEFAULT_SENDER, email) sgw.notify_founders(message, subject=full_name + " is interested in the presale") return gettext('Thanks! We\'ll be in touch soon.')
def eval_predictions(y_true, y_pred, print_results=False): # Plot confusion matrix plot_confusion_matrix(y_true=y_true, y_pred=y_pred, normalize=True, classes=['agree', 'disagree', 'discuss']) # TODO: Precision, recall, return results in a dict accuracy = accuracy_score(y_true=y_true, y_pred=y_pred) f1_score_micro = f1_score(y_true=y_true, y_pred=y_pred, average='micro') f1_score_macro = f1_score(y_true=y_true, y_pred=y_pred, average='macro') f1_score_weighted = f1_score(y_true=y_true, y_pred=y_pred, average='weighted') if print_results: log("Prediction Evaluation", header=True) log(f"Accuracy: {accuracy}") log(f"F1 Score (Macro): {f1_score_macro}") log(f"F1 Score (Micro): {f1_score_micro}") log(f"F1 Score (Weighted): {f1_score_weighted}")
def train_with_k_fold(k=10): folds = k_fold_indicies(data, k=k) for j, (train_idx, val_idx) in enumerate(folds): log(f"Fold {j}", header=True) # TODO: Need to create a new model? or else it just continues training train_headlines = data.headlines[train_idx] train_bodies = data.bodies[train_idx] train_stances = data.stances[train_idx] val_headlines = data.headlines[val_idx] val_bodies = data.bodies[val_idx] val_stances = data.stances[val_idx] # Train the model history = model.train( titles=train_headlines, bodies=train_bodies, labels=train_stances, epochs=NUM_EPOCHS, seq_len=SEQ_LEN, batch_size=BATCH_SIZE, val_split= TRAIN_VAL_SPLIT, # Don't use the y_val data - but instead use a % of training data to validate num_classes=NUM_CLASSES, logs_name=f"{Bidirectional}BIDIR){NUM_LSTM_UNITS}LSTM" + f"-{NUM_DENSE_HIDDEN}DENSE-{DROPOUT}DOUT-{NUM_EPOCHS}EPOCHS-FOLD{j}" ) # Plot training history plot_keras_history(history, True) y_val_true = val_stances y_val_pred = model.predict(titles=val_headlines, bodies=val_bodies, seq_len=SEQ_LEN, batch_size=BATCH_SIZE) y_val_pred = [np.argmax(i) for i in y_val_pred] eval_predictions(y_true=y_val_true, y_pred=y_val_pred, print_results=True)
def sendMsg(self, root_url): password = self.getPassword() token = self.cf.encrypt(password) loginPath = "/login" if root_url[-1] == "/": loginPath = "login" url = "{}{}".format(root_url, loginPath) log("Sending code to {username} with url: {url}/{username}/{token}". format(username=self.username, url=url, token=token)) body = """Dear {username}, Your login password is {password}. Click here to login: {url}/{username}/{token} Please change your password after you login. Thank you. WeBump! """.format(url=url, username=self.username, password=password, token=token) b = self.browser b.visit("https://forum.lowyat.net/index.php?act=Msg&CODE=04") b.fill('entered_name', self.username) b.fill('msg_title', 'WeBump! credentials') b.fill('Post', body) submitbtn = b.find_by_name("submit") submitbtn.click() log("Successfully sent code to {}".format(self.username))
def balance_stances(df): """ Strips out extra claims so we have a balanced dataset - i.e. # of labels for 0,1,2 are the same """ agree_claims = df[df['Stance'] == AGREE_KEY] disagree_claims = df[df['Stance'] == DISAGREE_KEY] discuss_claims = df[df['Stance'] == DISCUSS_KEY] # unrelated_claims = df[df['Stance'] == UNRELATED_KEY] max_index = min([ len(agree_claims.index), len(disagree_claims.index), len(discuss_claims.index), # len(unrelated_claims.index), ]) log(f"Max index is: {max_index}") return pd.concat([ agree_claims[0: max_index], disagree_claims[0: max_index], discuss_claims[0: max_index], # unrelated_claims[0: max_index] ]).sample(frac=1) # This shuffles
def plot_confusion_matrix(y_true, y_pred, classes, normalize=False, title=None, cmap=plt.cm.Blues): """ This function prints and plots the confusion matrix. Normalization can be applied by setting `normalize=True`. """ if not title: if normalize: title = 'Normalized confusion matrix' else: title = 'Confusion matrix, without normalization' # Compute confusion matrix cm = confusion_matrix(y_true, y_pred) # Only use the labels that appear in the data if normalize: cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] u.log("Normalized confusion matrix") else: u.log('Confusion matrix, without normalization') u.log(cm) fig, ax = plt.subplots() im = ax.imshow(cm, interpolation='nearest', cmap=cmap) ax.figure.colorbar(im, ax=ax) # We want to show all ticks... ax.set(xticks=np.arange(cm.shape[1]), yticks=np.arange(cm.shape[0]), # ... and label them with the respective list entries xticklabels=classes, yticklabels=classes, title=title, ylabel='True label', xlabel='Predicted label') # Rotate the tick labels and set their alignment. plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor") # Loop over data dimensions and create text annotations. fmt = '.2f' if normalize else 'd' thresh = cm.max() / 2. for i in range(cm.shape[0]): for j in range(cm.shape[1]): ax.text(j, i, format(cm[i, j], fmt), ha="center", va="center", color="white" if cm[i, j] > thresh else "black") fig.tight_layout() plt.show()
def delete(self, post_id): log("delete post_id {}".format(post_id)) post_owner = self.db.query( "SELECT u.username FROM post p, user u WHERE u.user_id=p.user_id AND post_id=:post_id", False, post_id=post_id)[0].username log("post_owner is {}".format(post_owner)) if post_owner.lower() == self.requestor.lower(): self.db.query( "UPDATE post SET deleted=1, status=0 WHERE post_id=:post_id", False, post_id=post_id) log("deleted")
def add(self, url): post_id = self.process_id(url) log("post_id is {}".format(post_id)) post_title, post_owner = self.getPostInfo(post_id) is_owner = post_owner.lower() == self.requestor.lower() if post_id and is_owner: status = "Verified success, will bump {}{} for {}!".format( self.topic_url, post_id, self.requestor) try: self.db.query( "INSERT OR IGNORE INTO post (post_id, title, url, count, status, user_id, created_at, updated_at) VALUES ({pid}, '{title}', '{base}{pid}', 0, 1, (SELECT user_id FROM user WHERE LOWER(username)='{uname}'), {t}, {t})" .format(pid=post_id, base=self.topic_url, uname=self.requestor.lower(), t=int(time.time()), title=post_title)) self.db.query( "UPDATE post SET deleted=0, updated_at={t} WHERE post_id={pid} AND user_id=(SELECT user_id FROM user WHERE LOWER(username)='{uname}')" .format(pid=post_id, uname=self.requestor.lower(), t=int(time.time()))) except: status = "Error inserting post" log(status) pass elif not is_owner: status = "post owner is {}, not you, {}".format( post_owner, self.requestor) log(status) else: status = "unable to process url {}".format(url) log(status) return status
def scheduler(post_id=None): log("start scheduler") cond = "" if post_id: cond = "AND post_id = {}".format(post_id) scheduler = Post() posts = scheduler.db.query("SELECT * FROM post WHERE status=1 {extra}".format(extra=cond)) for post in posts: if post.account_id == 1: next_execution = randomNextExecution() log("update post_id {}".format(post.post_id)) scheduler.db.query("UPDATE post SET next_execution=:next_execution WHERE user_id=:user_id and post_id=:post_id AND status=1;", False, next_execution=next_execution, user_id=post.user_id, post_id=post.post_id) log("end scheduler") if __name__ == "__main__": os.chdir(app_src_path) scheduler()
def join_mailing_list(): if not 'email' in request.form: return jsonify(success=False, message=gettext("Missing email")) email = request.form['email'] if not re.match(r"(^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$)", email): return jsonify(success=False, message=gettext("Invalid email")) # optional fields eth_address = request.form.get('eth_address') or None first_name = request.form.get('first_name') or None last_name = request.form.get('last_name') or None full_name = request.form.get('name') or None if not full_name and (first_name or last_name): full_name = ' '.join(filter(None, (first_name, last_name))) phone = request.form.get('phone') or None ip_addr = request.form.get('ip_addr') or get_real_ip() country_code = request.form.get('country_code') or get_country(ip_addr) dapp_user = 1 if 'dapp_user' in request.form else 0 investor = 1 if 'investor' in request.form else 0 backfill = request.form.get( 'backfill' ) or None # Indicates the request was made from an internal backfill script. new_user = False log('Updating mailing list for', email, eth_address) try: # Add an entry to the eth_contact DB table. if eth_address: log('Adding to wallet insights') insight.add_contact(address=eth_address, dapp_user=dapp_user, investor=investor, name=full_name, email=email, phone=phone, country_code=country_code) # Add an entry to the email_list table. log('Adding to mailing list') new_contact = mailing_list.add_contact(email, first_name, last_name, ip_addr, country_code) # If it is a new contact and not a backfill, send a welcome email. if new_contact and not backfill: log('Sending welcome email') mailing_list.send_welcome(email) # Add the entry to the Sendgrid contact list. if new_contact: new_user = True log('Adding to Sendgrid contact list') mailing_list.add_sendgrid_contact(email=email, full_name=full_name, country_code=country_code, dapp_user=dapp_user) except Exception as err: log('Failure: %s' % err) return jsonify(success=False, message=str(err)) if not new_user: return jsonify(success=True, message=gettext('You\'re already registered!')) return jsonify(success=True, message=gettext('Thanks for signing up!'))
from util.ip2geo import get_country from util.misc import sort_language_constants, get_real_ip, concat_asset_files, log import google.oauth2.credentials import google_auth_oauthlib.flow import googleapiclient.discovery import json # Translation: change path of messages.mo files app.config['BABEL_TRANSLATION_DIRECTORIES'] = '../translations' babel = Babel(app) recaptcha = ReCaptcha(app=app) if not recaptcha.is_enabled: log("Warning: recaptcha is not is_enabled") selected_lang = "" @app.before_request def beforeRequest(): """ Processing of URL before any routing """ # Force https on prod if constants.HTTPS: if not request.url.startswith( 'https') and 'ogn.dev' not in request.url_root: return redirect(request.url.replace('http', 'https', 1)) selected_lang = None if request.view_args and 'lang_code' in request.view_args: