def post(self):

        mongo_hashtags = len(Collection.hashtags())
        if mongo_hashtags >= 10:
            return {
                "error":
                "MongoDB max capacity (10 already reached). Remove some Collections first:"
            }

        # Upload tweets to MongoDB
        mongodb = Collection()
        keyword = request.form.get('trend')

        if keyword[0] != '#':
            keyword = '#' + keyword
        else:
            pass
        count = int(request.form.get('count'))

        # Prevent overpopulating by deleting previously loaded tweets
        Collection.delete_by_hashtag(hashtag=keyword)

        for tweet in tweepy.Cursor(twitter_api.search, q=keyword).items(count):
            data = {}
            data['text'] = tweet.text
            data['hashtag'] = keyword
            data['created_at'] = tweet.created_at
            data['retweet_count'] = tweet.retweet_count
            try:
                mongodb.insert_data(data)
            except Exception as e:
                pass

        results = []
        for tweet in tweepy.Cursor(twitter_api.search, q=keyword).items(count):
            try:
                dict_ = {
                    "user": tweet.user.name[:25],
                    "text": tweet.text,
                    "created_at": str(tweet.created_at),
                    "retweet_count": tweet.retweet_count,
                    "keyword": keyword
                }
                results.append(dict_)
            except Exception as e:
                pass

        return results
def add_collection():
    global our_cache
    coll_address = str(uuid.uuid1())
    doc_hash_1 = str(uuid.uuid1())
    doc_hash_2 = str(uuid.uuid1())
    coll = Collection(
            title="Test",
            description="This is a collection!",
            address=str(uuid.uuid1()),
            btc=str(uuid.uuid1()),
            keywords=[
            ],
            documents=[
                Document(
                    collection_address=doc_hash_1,
                    description="Test document A",
                    hash=str(uuid.uuid1()),
                    title="Test A",
                    ),
            ],
            creation_date=datetime.datetime.now(),
            oldest_date=datetime.datetime.now(),
            latest_broadcast_date=datetime.datetime.now()
    )
    our_cache.insert_new_collection(coll)
    collections.update_hash(coll)
    our_cache.session.commit()
    return coll
 def add_collection(self):
     our_cache = self.our_cache
     coll_address = str(uuid.uuid1())
     doc_hash_1 = str(uuid.uuid1())
     doc_hash_2 = str(uuid.uuid1())
     coll = Collection(
         title="Test",
         description="This is a collection!",
         address=coll_address,
         btc="123456789",
         keywords=[
         ],
         documents=[
             Document(
                 description="Test document A",
                 hash=doc_hash_1,
                 title="Test A",
                 ),
             Document(
                 description="Test document B",
                 hash=doc_hash_2,
                 title="Test B",
                 ),
         ],
         creation_date=datetime.datetime.now(),
         oldest_date=datetime.datetime.now(),
         latest_broadcast_date=datetime.datetime.now()
     )
     our_cache.insert_new_collection(coll)
     collections.update_hash(coll)
     our_cache.session.commit()
     return coll
Esempio n. 4
0
def manage_db():
    """Manage Databases View"""
    mongo_hashtags = Collection.hashtags()
    sqlite_hashtags = TweetsModel.distinct_hashtags()
    return render_template('manage_db/dashboard.html',
                           mongo_hashtags=mongo_hashtags,
                           sqlite_hashtags=sqlite_hashtags)
    def post(self):

        hashtag = request.form.get('hashtag_cleaning')

        # Prevent overpopulating
        TweetsModel.delete_by_hashtag(hashtag=hashtag)

        if hashtag is None:
            return redirect('trendsearch', message="hashtag was not specified")

        # Get chosen hashtag tweets
        hashtag_tweets = Collection.find_by_hashtag(hashtag=hashtag)

        # Preprocess text for future senitment analysis
        text = [element['text'] for element in hashtag_tweets]

        corpus = []
        for i in range(0, len(text)):
            try:
                tweet = re.sub('[^a-zA-Z]', ' ', text[i])  ## all the indexes
                tweet = tweet.lower()
                tweet = tweet.split()
                ps = PorterStemmer()
                tweet = [
                    ps.stem(word) for word in tweet if not word in stopwords
                ]
                tweet = ' '.join(tweet)
                corpus.append(tweet)
                new_tweet = TweetsModel(hashtag=hashtag, tweet=tweet)
                new_tweet.save_to_db()
            except Exception as e:
                pass

        return redirect('manage_db')
Esempio n. 6
0
def home(language='es'):
    site = Site(language, splitByLanguage('Inicio | Home', language))
    site.blowUp = True

    collection = Collection(site)
    items = collection.asItems()

    return render_template('home.html', site=site, items=items)
Esempio n. 7
0
def index():
    if request.method == 'GET':
        return jsonify(db.session.query(Collection.name).all()), 200
    elif request.method == 'POST':
        name = make_token()
        collection = Collection(name=name, title=name, \
            creator=flask_praetorian.current_user().username)
        db.session.add(collection)
        db.session.commit()
        return jsonify({'location': f'/collections/{name}/'}), 201
Esempio n. 8
0
    def __init__(
            self,
            collection_name: str,
            stopwords_list,
            lemmatizer,
    ):

        self.collection = Collection(collection_name, stopwords_list, lemmatizer)
        self.stopwords = stopwords_list
        self.lemmatizer = lemmatizer
Esempio n. 9
0
def manage_db():
    """Manage Databases View"""
    for attempt in range(0, 3):
        try:
            mongo_hashtags = Collection.hashtags()
        except AutoReconnect:
            time.sleep(2)
    sqlite_hashtags = TweetsModel.distinct_hashtags()
    return render_template('manage_db/dashboard.html',
                           mongo_hashtags=mongo_hashtags,
                           sqlite_hashtags=sqlite_hashtags)
 def __init__(
     self,
     collection_name: str,
     stopwords_list,
     text_transformer: TextTransformer,
     weighting_model: str = "tw-idf",
 ):
     self.collection = Collection(collection_name, stopwords_list,
                                  weighting_model, text_transformer)
     self.weighting_model = weighting_model
     self.stopwords = stopwords_list
     self.__text_transformer = text_transformer
     self.__cos = CosineSimilarity(dim=0, eps=1e-6)
Esempio n. 11
0
def collection(name):
    if not Collection.exists(name):
        return jsonify({'status': 'Resource does not exist.'}), 405
    if request.method == 'GET':
        return annotations_for(name), 200
    elif request.method == 'PUT':
        if not request.is_json:
            return jsonify({'status': 'Expected JSON.'}), 400
        data = request.get_json()
        annotation = Annotation(collection=name, data=data)
        if not validate_and_commit_json(name, data):
            return jsonify({'status': 'Poorly formatted JSON.'}), 400
        return jsonify({'status': 'Annotation data added.'}), 202
Esempio n. 12
0
 def setUp(self):
     self.controller = Controller()
     self.cache = self.controller.cache
     self.address = 'ffafaf'
     coll_address = str(uuid.uuid1())
     doc_hash_1 = str(uuid.uuid1())
     doc_hash_2 = str(uuid.uuid1())
     doc_hash_3 = str(uuid.uuid1())
     self.test_collection_evil = Collection(
         title="Test multiple33333",
         description="This is a collection! with multiple docs222",
         address=self.address,
         btc="123456789",
         keywords=[
             Keyword(name="Keyword A", id=1199),
             Keyword(name="Keyword c", id=1214),
         ],
         documents=[
             Document(
                 description="Test document Z",
                 hash="zzzzzzzz",
                 title="Test Z",
                 accesses=0,
                 filename="joe.txt",
                 collection_address="BM-2cSrapXpgDTFD8AyDmU1BGifNkB2Z6X9k8"
             ),
             Document(
                 description="Test document B",
                 hash='gdssgsdg',
                 title="Test B",
                 accesses=3,
                 filename="gile.txt",
                 collection_address="BM-2cSrapXpgDTFD8AyDmU1BGifNkB2Z6X9k8"
             ),
             Document(
                 description="Test document Bddd",
                 hash='afff',
                 title="Test B",
                 accesses=3,
                 filename="gile.txt",
                 collection_address="BM-2cSrapXpgDTFD8AyDmU1BGifNkB2Z6X9k8"
             ),
         ],
         creation_date=datetime.datetime.now(),
         oldest_date=datetime.datetime.now(),
         latest_broadcast_date=datetime.datetime.now(),
         latest_btc_tx="btctx1",
         oldest_btc_tx="btctx12",
         accesses=2,
         votes=3,
         votes_last_checked=datetime.datetime.now())
Esempio n. 13
0
def item(language, type, country, year, item):
    site = Site(language, item)
    site.blowUp = True
    site.lightSlider = True

    collection = Collection(site)
    item = Item(
        language,
        collection.find(type + '/' + country + '/' + year + '/' + item,
                        'Link'))

    site.title = item.name()
    site.permalink = item.link()

    return render_template('item.html', site=site, item=item)
    def test_as_item_returns_expected_results(self):
        data = googleData()

        site = Site('es', '')
        collection = Collection(site)
        collection.googleData = data

        # Assert data is turned right into a list of Items.
        assert collection.asItems()[1].name() == 'Nombre del objeto'

        # Assert data is skipped if all the fields are empty.
        assert collection.asItems()[0].name() == 'Nombre del objeto 2'

        # Assert empty list is returned when there's no data.
        collection.googleData = []
        assert collection.asItems() == []
Esempio n. 15
0
    def setUp(self):
        self.controller = Controller()
        self.address = self.controller.connection.create_address('Controller Test address', True)

        coll_address = str(uuid.uuid1())
        doc_hash_1 = str(uuid.uuid1())
        doc_hash_2 = str(uuid.uuid1())
        doc_hash_3 = str(uuid.uuid1())

        self.test_collection = Collection(
            title="Test",
            description="This is a collection!",
            address=self.address,
            btc="123456789",
            keywords=[
                Keyword(name="Keyword A"),
                Keyword(name="Keyword B"),
            ],
            documents=[
                Document(
                    description="Test document A",
                    hash=doc_hash_1,
                    title="Test A",
                    accesses=0,
                    filename="joe.txt",
                    collection_address="afgagahhsgh"
                    ),
                Document(
                    description="Test document B",
                    hash=doc_hash_2,
                    title="Test B",
                    accesses=3,
                    filename="gile.txt",
                    collection_address="afgagasghhhss"
                    ),
            ],
            creation_date=datetime.datetime.now(),
            oldest_date=datetime.datetime.now(),
            latest_broadcast_date=datetime.datetime.now(),
            latest_btc_tx="btctx1",
            oldest_btc_tx="btctx12",
            accesses=2,
            votes=3,
            votes_last_checked=datetime.datetime.now()
        )
        self.test_signature = Signature(pubkey='itsakey',address=self.address)
Esempio n. 16
0
 def setUp(self):
     self.cache = Cache()
     self.cache.reset_database()
     self.collection1 = Collection(
         title="First Cdollection",
         btc="btc",
         address="bm-first",
         description="description",
         keywords=[],
         documents=[],
         latest_broadcast_date=datetime.datetime.today(),
         creation_date=datetime.datetime.today(),
         oldest_date=datetime.datetime.today(),
         latest_btc_tx="",
         oldest_btc_tx="",
         accesses=0,
         votes=0,
         votes_last_checked=datetime.datetime.today())
     self.cache.insert_new_collection(self.collection1)
    def test_find_item_returns_expected_results(self):
        data = googleData()

        site = Site('es', '')
        collection = Collection(site)
        collection.googleData = data

        # Assert expected item by Spanish name.
        assert collection.find('Nombre del objeto 2') == data[2]

        # Assert expected item by English name.
        assert collection.find('Item name 2') == data[2]

        # Assert empty dict by wrong name.
        assert collection.find('Wrong') == {}

        # Assert expected value from a different field.
        assert collection.find('nombre-del-objeto-2', 'Link') == data[2]

        # Assert empty dict from a different field.
        assert collection.find('Wrong', 'Link de la Ceca | Mint\'s Link') == {}
Esempio n. 18
0
def put_collection(address_password, title, description, keywords, btc):
    """ Create a collection in local cache
        :param address_password: The password with which to protect the collection.
        Should be at least 20 characters for optimal security and unique.  Generates 
        the unique collection ID deterministically
        :param title: The title of the created collection
        :param description: The description of the created collection
        :param keywords: Comma-separated keywords for the resulting collection
        :param BTC: the Bitcoin address of the resulting collection
    """
    bitmessage_connection = Bitmessage()
    cache = Cache()
    address = bitmessage_connection.create_address(address_password)

    input_keywords = [Keyword(name=x) for x in keywords.split(",")]
    keywords = []
    for key in input_keywords:
            db_key = cache.get_keyword_by_name(key.name)
            if db_key is not None:
                keywords.append(db_key)
            else:
                keywords.append(key)
    collection = Collection(
        title=title,
        description=description,
        address=address,
        accesses=0,
        votes=0,
        btc=btc,
        keywords=keywords,
        documents=[],
        creation_date=datetime.datetime.now(),
        oldest_date=datetime.datetime.now(),
        votes_last_checked=datetime.datetime.now(),
        latest_broadcast_date=datetime.datetime.now()
    )
    cache.insert_new_collection(collection)
    print ("Collection inserted with address/ID " + address)
Esempio n. 19
0
    def setUp(self):
        cache = Cache()
        cache.reset_database()

        collection1 = Collection(
            title="First Collection",
            btc="btc",
            address="bm-first",
            description="description",
            keywords=[],
            documents=[],
            latest_broadcast_date=datetime.datetime.today(),
            creation_date=datetime.datetime.today(),
            oldest_date=datetime.datetime.today(),
            latest_btc_tx="",
            oldest_btc_tx="",
            accesses=0,
            votes=0,
            votes_last_checked=datetime.datetime.today())

        cache.insert_new_collection(collection1)

        collection2 = Collection(
            title="Second Collection",
            btc="btc",
            address="bm-second",
            description="description",
            keywords=[],
            documents=[],
            latest_broadcast_date=datetime.datetime.today() -
            datetime.timedelta(days=3),
            creation_date=datetime.datetime.today(),
            oldest_date=datetime.datetime.today(),
            latest_btc_tx="",
            oldest_btc_tx="",
            accesses=0,
            votes=0,
            votes_last_checked=datetime.datetime.today())

        cache.insert_new_collection(collection2)

        collection3 = Collection(
            title="Third Collection",
            btc="btc",
            address="bm-third",
            description="description",
            keywords=[],
            documents=[],
            latest_broadcast_date=datetime.datetime.today() -
            datetime.timedelta(days=1),
            creation_date=datetime.datetime.today(),
            oldest_date=datetime.datetime.today(),
            latest_btc_tx="",
            oldest_btc_tx="",
            accesses=0,
            votes=0,
            votes_last_checked=datetime.datetime.today())

        cache.insert_new_collection(collection3)

        collection4 = Collection(
            title="Fourth Collection",
            description="description",
            btc="btc",
            address="bm-fourth",
            keywords=[],
            documents=[],
            latest_broadcast_date=datetime.datetime.today() -
            datetime.timedelta(days=6),
            creation_date=datetime.datetime.today(),
            oldest_date=datetime.datetime.today(),
            latest_btc_tx="",
            oldest_btc_tx="",
            accesses=0,
            votes=0,
            votes_last_checked=datetime.datetime.today())

        cache.insert_new_collection(collection4)
Esempio n. 20
0
def form(language='es'):
    site = Site(
        language,
        splitByLanguage(
            'Añadir un Objeto a la Colección | Add an Item to the Collection',
            language))
    collection = Collection(site)

    # Ensure the form is being sent.
    if request.method == 'POST':
        # Upload the images first to SmartFile and once we get the URLs for the images
        # we will update our Google Spreadsheet to include that URLs.
        if not request.files['obverse'] or not request.files['reverse']:
            return render_template('form-error.html', site=site)
        else:
            # Builds the folder structure to store the image.
            filePath = 'Items/' + splitByLanguage(
                request.form.get('type'), 'en') + '/' + splitByLanguage(
                    request.form.get('country'), 'en') + '/' + splitByLanguage(
                        request.form.get('date'), 'en')

            # Creates the folder if needed.
            collection.smartFileClient.put('/path/oper/mkdir/' + filePath)

            # Generates the file name based on the item name.
            fileName = stringToURL(
                splitByLanguage(request.form.get('name'), 'en'))

            # Updates the file name to be uploaded with the correct name.
            obverse = request.files['obverse']
            obverse.filename = fileName + '-obverse.jpg'

            reverse = request.files['reverse']
            reverse.filename = fileName + '-reverse.jpg'

            # Uploads the Obverse and Reverse images.
            collection.smartFileClient.post('/path/data/' + filePath,
                                            file=(obverse.filename, obverse))
            collection.smartFileClient.post('/path/data/' + filePath,
                                            file=(reverse.filename, reverse))

            # Generates the href for the images to be saved in the Google Spreadsheet.
            obverseURL = collection.smartFileClient.post('/link',
                                                         path=filePath + '/' +
                                                         obverse.filename)
            reverseURL = collection.smartFileClient.post('/link',
                                                         path=filePath + '/' +
                                                         reverse.filename)

            if ('href' in obverseURL and 'href' in reverseURL):
                # Insert our data in the Google Spreadsheet.
                insertData = [
                    request.form.get('type'),
                    request.form.get('name'),
                    obverseURL['href'] + request.files['obverse'].filename,
                    reverseURL['href'] + request.files['reverse'].filename,
                    request.form.get('country'),
                    request.form.get('denomination'),
                    request.form.get('date'),
                    request.form.get('diameter'),
                    request.form.get('composition'),
                    request.form.get('series'),
                    request.form.get('serial'),
                    request.form.get('grading'),
                    request.form.get('value'),
                    request.form.get('cost'),
                    prepareItemLink(request.form.get('type'),
                                    request.form.get('country'),
                                    request.form.get('date'),
                                    request.form.get('name')),
                    request.form.get('mint'),
                ]

                rowCount = len(collection.googleData)

                # It is being inserted with +2 in the row Count to increase the total +1, but also taking into account the header row.
                collection.googleSheet.insert_row(insertData, rowCount + 2)

                return render_template('form-success.html', site=site)
            else:
                return render_template('form-error.html', site=site)

    return render_template('form.html', site=site)
Esempio n. 21
0
    def _cache_collection(self, payload, message):
        """
        Checks to see if this collection is already in the cache. If it is we update the collection with the new data.
        Otherwise a new collection is made and cached.
        :param message: the Bitmessage message containing an FJ_message
        :param payload: the contents of the FJ_message
        """
        # Grabbing the text representations of the documents and keywords and rebuilding them
        #docs, keywords = self._build_docs_keywords(payload)
        cached_collection = self.cache.get_collection_with_address(
            payload["address"])

        if cached_collection is None:
            collection_model = Collection(
                title=payload["title"],
                description=payload["description"],
                address=payload["address"],
                btc=payload["btc"],
                creation_date=datetime.datetime.strptime(
                    payload["creation_date"], "%A, %d. %B %Y %I:%M%p"),
                oldest_date=datetime.datetime.strptime(
                    payload["oldest_date"], "%A, %d. %B %Y %I:%M%p"),
                latest_broadcast_date=datetime.datetime.strptime(
                    payload["latest_broadcast_date"], "%A, %d. %B %Y %I:%M%p"),
                votes=payload['votes'],
                votes_last_checked=datetime.datetime.strptime(
                    payload["votes_last_checked"], "%A, %d. %B %Y %I:%M%p"),
            )

            self._build_docs_keywords(payload, collection_model)
            signature = Signature(pubkey=message["pubkey"],
                                  signature=message["signature"],
                                  address=payload["address"])
            try:
                self.cache.insert_new_collection(collection_model)
                self.cache.insert_new_collection(signature)
                self._hash_document_filenames(collection_model.documents,
                                              collection_model)
                self.download_threads.add(
                    self._download_documents(collection_model.title,
                                             collection_model.documents))
                print "Cached New Collection"
                return True
            except IntegrityError as m:
                print m.message
                return False
        else:
            cached_collection.keywords = []
            cached_sig = self.cache.get_signature_by_address(
                payload["address"])
            cached_sig.pubkey = message["pubkey"]
            cached_sig.signature = message["signature"]
            cached_collection.title = payload["title"]
            cached_collection.description = payload["description"]
            cached_collection.address = payload["address"]
            cached_collection.btc = payload["btc"]
            cached_collection.documents = []
            cached_collection.creation_date = datetime.datetime.strptime(
                payload["creation_date"], "%A, %d. %B %Y %I:%M%p")
            cached_collection.oldest_date = datetime.datetime.strptime(
                payload["oldest_date"], "%A, %d. %B %Y %I:%M%p")
            cached_collection.latest_broadcast_date = datetime.datetime.strptime(
                payload["latest_broadcast_date"], "%A, %d. %B %Y %I:%M%p")
            cached_collection.votes = payload['votes']
            cached_collection.votes_last_checked = datetime.datetime.strptime(
                payload["votes_last_checked"], "%A, %d. %B %Y %I:%M%p")
            self._build_docs_keywords(payload, cached_collection)
            try:
                self.cache.insert_new_collection(cached_collection)
                self.cache.insert_new_collection(cached_sig)
                self._hash_document_filenames(cached_collection.documents,
                                              cached_collection)
                self.download_threads.add(
                    self._download_documents(cached_collection.title,
                                             cached_collection.documents))
                print "Cached Updated Collection"
                return True
            except IntegrityError as m:
                print m.message
                return False
    def post(self):

        hashtag = request.form.get('hashtag_collection')
        Collection.delete_by_hashtag(hashtag=hashtag)
        return redirect('manage_db')