def post(self): mongo_hashtags = len(Collection.hashtags()) if mongo_hashtags >= 10: return { "error": "MongoDB max capacity (10 already reached). Remove some Collections first:" } # Upload tweets to MongoDB mongodb = Collection() keyword = request.form.get('trend') if keyword[0] != '#': keyword = '#' + keyword else: pass count = int(request.form.get('count')) # Prevent overpopulating by deleting previously loaded tweets Collection.delete_by_hashtag(hashtag=keyword) for tweet in tweepy.Cursor(twitter_api.search, q=keyword).items(count): data = {} data['text'] = tweet.text data['hashtag'] = keyword data['created_at'] = tweet.created_at data['retweet_count'] = tweet.retweet_count try: mongodb.insert_data(data) except Exception as e: pass results = [] for tweet in tweepy.Cursor(twitter_api.search, q=keyword).items(count): try: dict_ = { "user": tweet.user.name[:25], "text": tweet.text, "created_at": str(tweet.created_at), "retweet_count": tweet.retweet_count, "keyword": keyword } results.append(dict_) except Exception as e: pass return results
def add_collection(): global our_cache coll_address = str(uuid.uuid1()) doc_hash_1 = str(uuid.uuid1()) doc_hash_2 = str(uuid.uuid1()) coll = Collection( title="Test", description="This is a collection!", address=str(uuid.uuid1()), btc=str(uuid.uuid1()), keywords=[ ], documents=[ Document( collection_address=doc_hash_1, description="Test document A", hash=str(uuid.uuid1()), title="Test A", ), ], creation_date=datetime.datetime.now(), oldest_date=datetime.datetime.now(), latest_broadcast_date=datetime.datetime.now() ) our_cache.insert_new_collection(coll) collections.update_hash(coll) our_cache.session.commit() return coll
def add_collection(self): our_cache = self.our_cache coll_address = str(uuid.uuid1()) doc_hash_1 = str(uuid.uuid1()) doc_hash_2 = str(uuid.uuid1()) coll = Collection( title="Test", description="This is a collection!", address=coll_address, btc="123456789", keywords=[ ], documents=[ Document( description="Test document A", hash=doc_hash_1, title="Test A", ), Document( description="Test document B", hash=doc_hash_2, title="Test B", ), ], creation_date=datetime.datetime.now(), oldest_date=datetime.datetime.now(), latest_broadcast_date=datetime.datetime.now() ) our_cache.insert_new_collection(coll) collections.update_hash(coll) our_cache.session.commit() return coll
def manage_db(): """Manage Databases View""" mongo_hashtags = Collection.hashtags() sqlite_hashtags = TweetsModel.distinct_hashtags() return render_template('manage_db/dashboard.html', mongo_hashtags=mongo_hashtags, sqlite_hashtags=sqlite_hashtags)
def post(self): hashtag = request.form.get('hashtag_cleaning') # Prevent overpopulating TweetsModel.delete_by_hashtag(hashtag=hashtag) if hashtag is None: return redirect('trendsearch', message="hashtag was not specified") # Get chosen hashtag tweets hashtag_tweets = Collection.find_by_hashtag(hashtag=hashtag) # Preprocess text for future senitment analysis text = [element['text'] for element in hashtag_tweets] corpus = [] for i in range(0, len(text)): try: tweet = re.sub('[^a-zA-Z]', ' ', text[i]) ## all the indexes tweet = tweet.lower() tweet = tweet.split() ps = PorterStemmer() tweet = [ ps.stem(word) for word in tweet if not word in stopwords ] tweet = ' '.join(tweet) corpus.append(tweet) new_tweet = TweetsModel(hashtag=hashtag, tweet=tweet) new_tweet.save_to_db() except Exception as e: pass return redirect('manage_db')
def home(language='es'): site = Site(language, splitByLanguage('Inicio | Home', language)) site.blowUp = True collection = Collection(site) items = collection.asItems() return render_template('home.html', site=site, items=items)
def index(): if request.method == 'GET': return jsonify(db.session.query(Collection.name).all()), 200 elif request.method == 'POST': name = make_token() collection = Collection(name=name, title=name, \ creator=flask_praetorian.current_user().username) db.session.add(collection) db.session.commit() return jsonify({'location': f'/collections/{name}/'}), 201
def __init__( self, collection_name: str, stopwords_list, lemmatizer, ): self.collection = Collection(collection_name, stopwords_list, lemmatizer) self.stopwords = stopwords_list self.lemmatizer = lemmatizer
def manage_db(): """Manage Databases View""" for attempt in range(0, 3): try: mongo_hashtags = Collection.hashtags() except AutoReconnect: time.sleep(2) sqlite_hashtags = TweetsModel.distinct_hashtags() return render_template('manage_db/dashboard.html', mongo_hashtags=mongo_hashtags, sqlite_hashtags=sqlite_hashtags)
def __init__( self, collection_name: str, stopwords_list, text_transformer: TextTransformer, weighting_model: str = "tw-idf", ): self.collection = Collection(collection_name, stopwords_list, weighting_model, text_transformer) self.weighting_model = weighting_model self.stopwords = stopwords_list self.__text_transformer = text_transformer self.__cos = CosineSimilarity(dim=0, eps=1e-6)
def collection(name): if not Collection.exists(name): return jsonify({'status': 'Resource does not exist.'}), 405 if request.method == 'GET': return annotations_for(name), 200 elif request.method == 'PUT': if not request.is_json: return jsonify({'status': 'Expected JSON.'}), 400 data = request.get_json() annotation = Annotation(collection=name, data=data) if not validate_and_commit_json(name, data): return jsonify({'status': 'Poorly formatted JSON.'}), 400 return jsonify({'status': 'Annotation data added.'}), 202
def setUp(self): self.controller = Controller() self.cache = self.controller.cache self.address = 'ffafaf' coll_address = str(uuid.uuid1()) doc_hash_1 = str(uuid.uuid1()) doc_hash_2 = str(uuid.uuid1()) doc_hash_3 = str(uuid.uuid1()) self.test_collection_evil = Collection( title="Test multiple33333", description="This is a collection! with multiple docs222", address=self.address, btc="123456789", keywords=[ Keyword(name="Keyword A", id=1199), Keyword(name="Keyword c", id=1214), ], documents=[ Document( description="Test document Z", hash="zzzzzzzz", title="Test Z", accesses=0, filename="joe.txt", collection_address="BM-2cSrapXpgDTFD8AyDmU1BGifNkB2Z6X9k8" ), Document( description="Test document B", hash='gdssgsdg', title="Test B", accesses=3, filename="gile.txt", collection_address="BM-2cSrapXpgDTFD8AyDmU1BGifNkB2Z6X9k8" ), Document( description="Test document Bddd", hash='afff', title="Test B", accesses=3, filename="gile.txt", collection_address="BM-2cSrapXpgDTFD8AyDmU1BGifNkB2Z6X9k8" ), ], creation_date=datetime.datetime.now(), oldest_date=datetime.datetime.now(), latest_broadcast_date=datetime.datetime.now(), latest_btc_tx="btctx1", oldest_btc_tx="btctx12", accesses=2, votes=3, votes_last_checked=datetime.datetime.now())
def item(language, type, country, year, item): site = Site(language, item) site.blowUp = True site.lightSlider = True collection = Collection(site) item = Item( language, collection.find(type + '/' + country + '/' + year + '/' + item, 'Link')) site.title = item.name() site.permalink = item.link() return render_template('item.html', site=site, item=item)
def test_as_item_returns_expected_results(self): data = googleData() site = Site('es', '') collection = Collection(site) collection.googleData = data # Assert data is turned right into a list of Items. assert collection.asItems()[1].name() == 'Nombre del objeto' # Assert data is skipped if all the fields are empty. assert collection.asItems()[0].name() == 'Nombre del objeto 2' # Assert empty list is returned when there's no data. collection.googleData = [] assert collection.asItems() == []
def setUp(self): self.controller = Controller() self.address = self.controller.connection.create_address('Controller Test address', True) coll_address = str(uuid.uuid1()) doc_hash_1 = str(uuid.uuid1()) doc_hash_2 = str(uuid.uuid1()) doc_hash_3 = str(uuid.uuid1()) self.test_collection = Collection( title="Test", description="This is a collection!", address=self.address, btc="123456789", keywords=[ Keyword(name="Keyword A"), Keyword(name="Keyword B"), ], documents=[ Document( description="Test document A", hash=doc_hash_1, title="Test A", accesses=0, filename="joe.txt", collection_address="afgagahhsgh" ), Document( description="Test document B", hash=doc_hash_2, title="Test B", accesses=3, filename="gile.txt", collection_address="afgagasghhhss" ), ], creation_date=datetime.datetime.now(), oldest_date=datetime.datetime.now(), latest_broadcast_date=datetime.datetime.now(), latest_btc_tx="btctx1", oldest_btc_tx="btctx12", accesses=2, votes=3, votes_last_checked=datetime.datetime.now() ) self.test_signature = Signature(pubkey='itsakey',address=self.address)
def setUp(self): self.cache = Cache() self.cache.reset_database() self.collection1 = Collection( title="First Cdollection", btc="btc", address="bm-first", description="description", keywords=[], documents=[], latest_broadcast_date=datetime.datetime.today(), creation_date=datetime.datetime.today(), oldest_date=datetime.datetime.today(), latest_btc_tx="", oldest_btc_tx="", accesses=0, votes=0, votes_last_checked=datetime.datetime.today()) self.cache.insert_new_collection(self.collection1)
def test_find_item_returns_expected_results(self): data = googleData() site = Site('es', '') collection = Collection(site) collection.googleData = data # Assert expected item by Spanish name. assert collection.find('Nombre del objeto 2') == data[2] # Assert expected item by English name. assert collection.find('Item name 2') == data[2] # Assert empty dict by wrong name. assert collection.find('Wrong') == {} # Assert expected value from a different field. assert collection.find('nombre-del-objeto-2', 'Link') == data[2] # Assert empty dict from a different field. assert collection.find('Wrong', 'Link de la Ceca | Mint\'s Link') == {}
def put_collection(address_password, title, description, keywords, btc): """ Create a collection in local cache :param address_password: The password with which to protect the collection. Should be at least 20 characters for optimal security and unique. Generates the unique collection ID deterministically :param title: The title of the created collection :param description: The description of the created collection :param keywords: Comma-separated keywords for the resulting collection :param BTC: the Bitcoin address of the resulting collection """ bitmessage_connection = Bitmessage() cache = Cache() address = bitmessage_connection.create_address(address_password) input_keywords = [Keyword(name=x) for x in keywords.split(",")] keywords = [] for key in input_keywords: db_key = cache.get_keyword_by_name(key.name) if db_key is not None: keywords.append(db_key) else: keywords.append(key) collection = Collection( title=title, description=description, address=address, accesses=0, votes=0, btc=btc, keywords=keywords, documents=[], creation_date=datetime.datetime.now(), oldest_date=datetime.datetime.now(), votes_last_checked=datetime.datetime.now(), latest_broadcast_date=datetime.datetime.now() ) cache.insert_new_collection(collection) print ("Collection inserted with address/ID " + address)
def setUp(self): cache = Cache() cache.reset_database() collection1 = Collection( title="First Collection", btc="btc", address="bm-first", description="description", keywords=[], documents=[], latest_broadcast_date=datetime.datetime.today(), creation_date=datetime.datetime.today(), oldest_date=datetime.datetime.today(), latest_btc_tx="", oldest_btc_tx="", accesses=0, votes=0, votes_last_checked=datetime.datetime.today()) cache.insert_new_collection(collection1) collection2 = Collection( title="Second Collection", btc="btc", address="bm-second", description="description", keywords=[], documents=[], latest_broadcast_date=datetime.datetime.today() - datetime.timedelta(days=3), creation_date=datetime.datetime.today(), oldest_date=datetime.datetime.today(), latest_btc_tx="", oldest_btc_tx="", accesses=0, votes=0, votes_last_checked=datetime.datetime.today()) cache.insert_new_collection(collection2) collection3 = Collection( title="Third Collection", btc="btc", address="bm-third", description="description", keywords=[], documents=[], latest_broadcast_date=datetime.datetime.today() - datetime.timedelta(days=1), creation_date=datetime.datetime.today(), oldest_date=datetime.datetime.today(), latest_btc_tx="", oldest_btc_tx="", accesses=0, votes=0, votes_last_checked=datetime.datetime.today()) cache.insert_new_collection(collection3) collection4 = Collection( title="Fourth Collection", description="description", btc="btc", address="bm-fourth", keywords=[], documents=[], latest_broadcast_date=datetime.datetime.today() - datetime.timedelta(days=6), creation_date=datetime.datetime.today(), oldest_date=datetime.datetime.today(), latest_btc_tx="", oldest_btc_tx="", accesses=0, votes=0, votes_last_checked=datetime.datetime.today()) cache.insert_new_collection(collection4)
def form(language='es'): site = Site( language, splitByLanguage( 'Añadir un Objeto a la Colección | Add an Item to the Collection', language)) collection = Collection(site) # Ensure the form is being sent. if request.method == 'POST': # Upload the images first to SmartFile and once we get the URLs for the images # we will update our Google Spreadsheet to include that URLs. if not request.files['obverse'] or not request.files['reverse']: return render_template('form-error.html', site=site) else: # Builds the folder structure to store the image. filePath = 'Items/' + splitByLanguage( request.form.get('type'), 'en') + '/' + splitByLanguage( request.form.get('country'), 'en') + '/' + splitByLanguage( request.form.get('date'), 'en') # Creates the folder if needed. collection.smartFileClient.put('/path/oper/mkdir/' + filePath) # Generates the file name based on the item name. fileName = stringToURL( splitByLanguage(request.form.get('name'), 'en')) # Updates the file name to be uploaded with the correct name. obverse = request.files['obverse'] obverse.filename = fileName + '-obverse.jpg' reverse = request.files['reverse'] reverse.filename = fileName + '-reverse.jpg' # Uploads the Obverse and Reverse images. collection.smartFileClient.post('/path/data/' + filePath, file=(obverse.filename, obverse)) collection.smartFileClient.post('/path/data/' + filePath, file=(reverse.filename, reverse)) # Generates the href for the images to be saved in the Google Spreadsheet. obverseURL = collection.smartFileClient.post('/link', path=filePath + '/' + obverse.filename) reverseURL = collection.smartFileClient.post('/link', path=filePath + '/' + reverse.filename) if ('href' in obverseURL and 'href' in reverseURL): # Insert our data in the Google Spreadsheet. insertData = [ request.form.get('type'), request.form.get('name'), obverseURL['href'] + request.files['obverse'].filename, reverseURL['href'] + request.files['reverse'].filename, request.form.get('country'), request.form.get('denomination'), request.form.get('date'), request.form.get('diameter'), request.form.get('composition'), request.form.get('series'), request.form.get('serial'), request.form.get('grading'), request.form.get('value'), request.form.get('cost'), prepareItemLink(request.form.get('type'), request.form.get('country'), request.form.get('date'), request.form.get('name')), request.form.get('mint'), ] rowCount = len(collection.googleData) # It is being inserted with +2 in the row Count to increase the total +1, but also taking into account the header row. collection.googleSheet.insert_row(insertData, rowCount + 2) return render_template('form-success.html', site=site) else: return render_template('form-error.html', site=site) return render_template('form.html', site=site)
def _cache_collection(self, payload, message): """ Checks to see if this collection is already in the cache. If it is we update the collection with the new data. Otherwise a new collection is made and cached. :param message: the Bitmessage message containing an FJ_message :param payload: the contents of the FJ_message """ # Grabbing the text representations of the documents and keywords and rebuilding them #docs, keywords = self._build_docs_keywords(payload) cached_collection = self.cache.get_collection_with_address( payload["address"]) if cached_collection is None: collection_model = Collection( title=payload["title"], description=payload["description"], address=payload["address"], btc=payload["btc"], creation_date=datetime.datetime.strptime( payload["creation_date"], "%A, %d. %B %Y %I:%M%p"), oldest_date=datetime.datetime.strptime( payload["oldest_date"], "%A, %d. %B %Y %I:%M%p"), latest_broadcast_date=datetime.datetime.strptime( payload["latest_broadcast_date"], "%A, %d. %B %Y %I:%M%p"), votes=payload['votes'], votes_last_checked=datetime.datetime.strptime( payload["votes_last_checked"], "%A, %d. %B %Y %I:%M%p"), ) self._build_docs_keywords(payload, collection_model) signature = Signature(pubkey=message["pubkey"], signature=message["signature"], address=payload["address"]) try: self.cache.insert_new_collection(collection_model) self.cache.insert_new_collection(signature) self._hash_document_filenames(collection_model.documents, collection_model) self.download_threads.add( self._download_documents(collection_model.title, collection_model.documents)) print "Cached New Collection" return True except IntegrityError as m: print m.message return False else: cached_collection.keywords = [] cached_sig = self.cache.get_signature_by_address( payload["address"]) cached_sig.pubkey = message["pubkey"] cached_sig.signature = message["signature"] cached_collection.title = payload["title"] cached_collection.description = payload["description"] cached_collection.address = payload["address"] cached_collection.btc = payload["btc"] cached_collection.documents = [] cached_collection.creation_date = datetime.datetime.strptime( payload["creation_date"], "%A, %d. %B %Y %I:%M%p") cached_collection.oldest_date = datetime.datetime.strptime( payload["oldest_date"], "%A, %d. %B %Y %I:%M%p") cached_collection.latest_broadcast_date = datetime.datetime.strptime( payload["latest_broadcast_date"], "%A, %d. %B %Y %I:%M%p") cached_collection.votes = payload['votes'] cached_collection.votes_last_checked = datetime.datetime.strptime( payload["votes_last_checked"], "%A, %d. %B %Y %I:%M%p") self._build_docs_keywords(payload, cached_collection) try: self.cache.insert_new_collection(cached_collection) self.cache.insert_new_collection(cached_sig) self._hash_document_filenames(cached_collection.documents, cached_collection) self.download_threads.add( self._download_documents(cached_collection.title, cached_collection.documents)) print "Cached Updated Collection" return True except IntegrityError as m: print m.message return False
def post(self): hashtag = request.form.get('hashtag_collection') Collection.delete_by_hashtag(hashtag=hashtag) return redirect('manage_db')