def main():
    try:
        os.remove('data.txt')
    except Exception:
        pass

    t1 = datetime.datetime.utcnow()

    storage = KindOfStorage(Lock(), Value('i', 0))
    counter = Counter(Lock(), Value('i', 0))
    registry = Registry(Lock(), dict())

    urls_queue = Queue()

    counter.up()
    urls_queue.put([entering_url, 3])

    parsers = [
        Thread(target=Parser(storage, counter, registry, prefix,
                             workers_number, urls_queue).run)
        for _ in range(workers_number)
    ]

    [p.start() for p in parsers]
    [p.join() for p in parsers]

    logging.info(datetime.datetime.utcnow() - t1)
Beispiel #2
0
class GetKey:
    key = Value('d', 0)
    key2 = Value('d', 0)
    p = None

    def __init__(self):
        self.p = Process(target=self.GetKeyThread, args=(self.key, ))
        self.p.start()

    def GetKeyThread(self, key):
        while True:
            key.value = ord(getch.getch())
            if key.value == 224:
                key.value = key.value + ord(getch.getch())
            if key.value == 27:
                break

    def __call__(self):
        value = self.key.value
        return value
Beispiel #3
0
def parallel_scan(app_config, user_id, media_files):
    """
    Once the app is launched for the first scan (when there is no database) or in order to re-scan,
    analyzing media files will be performed on demand (as authorized user, navigate to /settings and
    click 'Scan Media Files' button).
    To speed-up the scan process, 32 threads max will be used for this task.
    During the scan process files metadata is retrieved from files and is registered in the DB.

    :param app_config: a dictionary containing the application configuration settings (=app.config).
    :param user_id: an integer number of user id which will be considered as owner (0 for public).
    :param media_files: a list of strings - absolute paths of media files to be processed.
    :return: True.
    """
    passed, lock_passed = Value('i', 0), Lock()
    failed, lock_failed = Value('i', 0), Lock()
    args = [(app_config, user_id, path, passed, lock_passed, failed, lock_failed)
            for path in media_files]
    pool = ThreadPool(2)
    pool.starmap(single_scan, args)
    pool.close()
    pool.join()
    return True
Beispiel #4
0

entering_url = 'https://docs.python.org/3.6/'
prefix = 'https://docs.python.org/3.6/'


class FakeLock():

    def __enter__(self):
        pass

    def __exit__(self, exc_type, exc_val, exc_tb):
        pass


storage = KindOfStorage(FakeLock(), Value('i', 0))
counter = Counter(FakeLock(), Value('i', 0))
registry = Registry(FakeLock(), dict())


semaphore = asyncio.Semaphore(15)


async def main(loop):
    counter.up()
    registry.add(entering_url)

    connector = aiohttp.TCPConnector(family=socket.AF_INET)
    session = aiohttp.ClientSession(connector=connector)
    await recursive_load(entering_url, loop, session)
Beispiel #5
0
 def start(self):
     self._continue = Value('b', True)
     self.pool.apply_async(self._record_and_send, [])
     self.pool.apply_async(self._receive_and_play, [])
     self.pool.close()
Beispiel #6
0
    limit:          set the number of tweet to retrieve (Increments of 20, default: 100)
    follow_limit:   set the number of following and followers to retrieve (default: 100)
    since:          date selector for tweets (Example: 2017-12-27)
    until:          date selector for tweets (Example: 2017-12-27)
    retweet:        set to 1 to retrieve retweet (default: 0)
    search:         search terms format "i search"
                    for hashtag : (#Hashtag)
                    for multiple : (#Hashtag1 AND|OR #Hashtag2)
                    
    tweet_interact: set to 1 to parse tweet interaction between users (default: 0)
    depth:          search tweet and info from list of follow

TODO: Retrieve tweet twitted to the user ?
"""
p = None
_stop = Value('b',True)

"""get tweets/follow interaction from user"""
@app.route('/tweets/<string:user>', methods=['GET'])
def user_tweet(user):
    request_twint.get_info_from_user.delay(user, request.args)
    return "200"

"""get many tweets from hashtag or search terms"""
@app.route('/tweets/', methods=['GET'])
def search_tweet():

    global p
    global _stop

    if not _stop.value:
Beispiel #7
0
import time
from datetime import date
from multiprocessing.dummy import Value, Process
from threading import Thread
from flask import Flask, request, Response

from TrollHunter.twitter_crawler import crawler
from TrollHunter.twitter_crawler.twint_api import request_twint

app = Flask(__name__)
p = None
_stop = Value('b',False)

@app.route('/tweets/<string:user>', methods=['GET'])
def user_tweet(user):
    request_twint.get_info_from_user.delay(user, request.args)
    return "200"



@app.route('/tweets/', methods=['GET'])
def search_tweet():

    global p
    global _stop

    if not _stop.value:
        return  " please stop previous search wiht endpoint /stop"
    else:
        _stop.value = False