Esempio n. 1
0
def _analyze_users_similarity(args):
    user, data_set, min_similarity, offset, limit = args
    logger.info("{} {}".format(offset, limit))
    repository = Repository(data_set=data_set)
    progress = Progress(limit - 1)
    for users in batch(
            repository.get_users_products(offset=offset, limit=limit - 1),
            1000):
        for user2 in users:
            if user['_id'] == user2['_id']:
                continue

            progress.advance()
            similarity, common, additional1, additional2 = calculate_products_similarity(
                user['products'], user2['products'])
            if similarity >= min_similarity:
                similar = dict(user1_id=user['user_id'],
                               user2_id=user2['user_id'],
                               similarity=similarity,
                               common_products=common,
                               add_products1=additional1,
                               add_products2=additional2)

                repository.add_users_similarity(similar)

        logger.info("{:.1f}% ETA {}".format(progress.get_progress(),
                                            progress.get_estimated_time()))
Esempio n. 2
0
def analyze_orders_similarity_multi(data_set, samples, orders, last_order_id,
                                    user_id):
    repository = Repository(data_set=data_set)
    progress = Progress(orders)
    min_similarity = 0.2
    # offset = 1

    processes = 5
    pool = multiprocessing.Pool(processes=processes)
    step = math.ceil(samples / processes)

    logger.info("Last order {}".format(last_order_id))

    for orders in batch(repository.get_orders_for_user(user_id=user_id), 10):
        tasks = []
        for order in orders:
            progress.advance()
            last_order_id = order['_id']
            for from_sample in range(0, samples, step):
                tasks.append(
                    (order, data_set, min_similarity, from_sample, step))

        logger.info("Last order {}".format(last_order_id))
        pool.map(_analyze_orders_similarity, tasks)
        logger.info("{:.1f}% ETA {}".format(progress.get_progress(),
                                            progress.get_estimated_time()))

    pool.close()
    pool.join()
Esempio n. 3
0
def most_frequently_bought(data_set, user_id):
    repository = Repository(data_set=data_set)
    user_products = repository.get_products_bought_globally()
    recommended = sorted(user_products,
                         key=lambda item: item['count'],
                         reverse=True)[:20]
    for p in recommended:
        product = repository.get_product(p['_id'])
        if product:
            logger.info("{} {}".format(product['product_name'], p['count']))
Esempio n. 4
0
def load_products(data_set):
    reader = Reader(data_set=data_set)
    repository = Repository(data_set=data_set)

    loaded = 0
    logger.info("Loading products")
    for products in batch(reader.load_products(), 100):
        repository.add_products(products)
        loaded += len(products)
        logger.info("Loaded products {}".format(loaded))
Esempio n. 5
0
def analyze_users_similarity_multi(data_set, samples, user_id):
    repository = Repository(data_set=data_set)
    min_similarity = 0.2
    processes = 5
    pool = multiprocessing.Pool(processes=processes)
    step = math.ceil(samples / processes)
    user1 = repository.get_user_products(user_id=user_id)
    tasks = []
    for from_sample in range(0, samples, step):
        tasks.append((user1, data_set, min_similarity, from_sample, step))

    pool.map(_analyze_users_similarity, tasks)
    pool.close()
    pool.join()
Esempio n. 6
0
def load_orders(data_set):
    reader = Reader(data_set=data_set)
    repository = Repository(data_set=data_set)

    loaded = 0
    logger.info("Loading orders")
    for orders in batch(reader.load_orders(), 100):
        orders_products = []
        for order in orders:
            order_products = repository.find_order_products(order['order_id'])
            order_products = [p.copy() for p in order_products]
            if not order_products:
                continue

            order['products'] = order_products
            orders_products.append(order)

        if orders_products:
            repository.add_orders(orders_products)

        loaded += len(orders_products)
        logger.info("Loaded orders {}".format(loaded))
Esempio n. 7
0
def most_frequently_bought_by_similar_users(data_set, user_id):
    repository = Repository(data_set=data_set)
    users = repository.get_similar_users_for_user(user_id=user_id)
    recommend_products = defaultdict(float)
    common_products = defaultdict(float)
    count = 0
    for user in users:
        if user['user1_id'] == user_id:
            products = user['add_products2']
        elif user['user2_id'] == user_id:
            products = user['add_products1']
        else:
            raise Exception()

        for p in products:
            recommend_products[p] += user['similarity']

        for p in user['common_products']:
            common_products[p] += 1

    logger.info("-- Most frequent common products:")
    common_products = sorted(common_products.items(),
                             key=lambda item: item[1],
                             reverse=True)[:10]
    for product_id, count in common_products:
        product = repository.get_product(product_id)
        if product:
            logger.info("{} {}".format(product['product_name'], count))

    logger.info("-- Recommended products:")
    recommended = sorted(recommend_products.items(),
                         key=lambda item: item[1],
                         reverse=True)[:10]
    for product_id, count in recommended:
        product = repository.get_product(product_id)
        if product:
            logger.info("{} {}".format(product['product_name'], count))
    def process(self, tweet):
        stored_tweet = Repository.create(tweet)

        if stored_tweet is None:
            return None

        username = tweet.user.username
        sorted_scores = self.get_potential_places(tweet)
        actual_places = self.places[username]
        actual_communes = self.communes[username]

        # Iterate over all the potential places by score to find commune
        for potential_place, score in sorted_scores:
            potential_commune = potential_place

            # If the place is a commune, restrict all places to be inside that commune
            if potential_commune in actual_communes:
                actual_places = actual_communes[potential_place]

                # We found a commune, do not look for any more
                break

        # Iterate over all the potential places by score
        for potential_place, score in sorted_scores:

            # Check if the potential place is in the actual places
            if potential_place in actual_places:
                actual_place = actual_places[potential_place]

                # Create a relation between the tweet and the place
                position = self.link_tweet_to_place(tweet, actual_place)

                # We found a place, so lets move on to the next weet
                break

        return Repository.read(tweet.id)
Esempio n. 9
0
    def test_stevens_info(self):
        """ test whole correct files """
        stevens = Repository('Stevens')
        
        students_info = {'10103': ['10103', 'Baldwin, C', 'SFEN', {'SSW 567': 'A', 'SSW 564': 'A-', 'SSW 687': 'B', 'CS 501': 'B'}],
                         '10115': ['10115', 'Wyatt, X', 'SFEN', {'SSW 567': 'A', 'SSW 564': 'B+', 'SSW 687': 'A', 'CS 545': 'A'}],
                         '10172': ['10172', 'Forbes, I', 'SFEN', {'SSW 555': 'A', 'SSW 567': 'A-'}],
                         '10175': ['10175', 'Erickson, D', 'SFEN', {'SSW 567': 'A', 'SSW 564': 'A', 'SSW 687': 'B-'}],
                         '10183': ['10183', 'Chapman, O', 'SFEN', {'SSW 689': 'A'}],
                         '11399': ['11399', 'Cordova, I', 'SYEN', {'SSW 540': 'B'}],
                         '11461': ['11461', 'Wright, U', 'SYEN', {'SYS 800': 'A', 'SYS 750': 'A-', 'SYS 611': 'A'}],
                         '11658': ['11658', 'Kelly, P', 'SYEN', {'SSW 540': 'F'}],
                         '11714': ['11714', 'Morton, A', 'SYEN', {'SYS 611': 'A', 'SYS 645': 'C'}],
                         '11788': ['11788', 'Fuller, E', 'SYEN', {'SSW 540': 'A'}]}
       
        instructors_info = {'98765': ['98765', 'Einstein, A', 'SFEN', {'SSW 567': 4, 'SSW 540': 3}], 
                            '98764': ['98764', 'Feynman, R', 'SFEN', {'SSW 564': 3, 'SSW 687': 3, 'CS 501': 1, 'CS 545': 1}], 
                            '98763': ['98763', 'Newton, I', 'SFEN', {'SSW 555': 1, 'SSW 689': 1}], 
                            '98762': ['98762', 'Hawking, S', 'SYEN', {}], 
                            '98761': ['98761', 'Edison, A', 'SYEN', {}], 
                            '98760': ['98760', 'Darwin, C', 'SYEN', {'SYS 800': 1, 'SYS 750': 1, 'SYS 611': 2, 'SYS 645': 1}]}
        
        majors_info = {'SFEN': ['SFEN', ('SSW 555', 'SSW 564', 'SSW 567', 'SSW 540'), ('CS 513', 'CS 545', 'CS 501')],
                       'SYEN': ['SYEN', ('SYS 800', 'SYS 612', 'SYS 671'), ('SSW 565', 'SSW 810', 'SSW 540')]}


        students_dic = dict()
        for CWID, person in stevens.students.items():
            students_dic[CWID] = person.get_whole_info()
            
        instructors_dic = dict()
        for CWID, person in stevens.instructors.items():
            instructors_dic[CWID] = person.get_whole_info()

        majors_dic = dict()
        for major, major_info in stevens.majors.items():
            majors_dic[major] = major_info.get_whole_info()
        
        self.assertEqual(students_dic, students_info)
        self.assertEqual(instructors_dic, instructors_info)

        for item, major in majors_dic.items():
            self.assertEqual(major[0], majors_info[item][0])
            self.assertTrue(major[1], majors_info[item][1])
            self.assertTrue(major[2], majors_info[item][2])
Esempio n. 10
0
    def test_student_courses_info(self):
        """ test student successfully completed courses, remaining required courses and electives"""
        stevens = Repository('Stevens')
        courses_info = {'10103': [['SSW 567', 'SSW 564', 'SSW 687', 'CS 501'], ['SSW 540', 'SSW 555'], None],
                        '10115': [['SSW 567', 'SSW 564', 'SSW 687', 'CS 545'], ['SSW 540', 'SSW 555'], None],
                        '10172': [['SSW 555', 'SSW 567'], ['SSW 540', 'SSW 564'], ['CS 501', 'CS 513', 'CS 545']],
                        '10175': [['SSW 567', 'SSW 564', 'SSW 687'], ['SSW 540', 'SSW 555'], ['CS 501', 'CS 513', 'CS 545']],
                        '10183': [['SSW 689'], ['SSW 540', 'SSW 555', 'SSW 564', 'SSW 567'], ['CS 501', 'CS 513', 'CS 545']],
                        '11399': [['SSW 540'], ['SYS 612', 'SYS 671', 'SYS 800'], None],
                        '11461': [['SYS 800', 'SYS 750', 'SYS 611'], ['SYS 612', 'SYS 671'], ['SSW 540', 'SSW 565', 'SSW 810']],
                        '11658': [[], ['SYS 612', 'SYS 671', 'SYS 800'], ['SSW 540', 'SSW 565', 'SSW 810']],
                        '11714': [['SYS 611', 'SYS 645'], ['SYS 612', 'SYS 671', 'SYS 800'], ['SSW 540', 'SSW 565', 'SSW 810']],
                        '11788': [['SSW 540'], ['SYS 612', 'SYS 671', 'SYS 800'], None]}

        courses_dic = dict()
        for CWID, person in stevens.students.items():
            courses_dic[CWID] = stevens.majors[person.major].update_courses_info(person.courses)
        self.assertTrue(courses_dic, courses_info)
Esempio n. 11
0
def analyze_products_by_user(data_set):
    repository = Repository(data_set=data_set)
    users = repository.get_users()
    count = 0
    total = len(users)
    for user_ids in batch(users, 100):
        users_products = []
        for user_id in user_ids:
            user_products = repository.get_products_bought_by_user(user_id)
            user_products = dict(user_id=user_id,
                                 products=[
                                     dict(product_id=p['_id'],
                                          count=p['count'])
                                     for p in user_products
                                 ])

            users_products.append(user_products)

            count += 1
            logger.info("{}/{}".format(count, total))

        repository.add_user_products(users_products)
Esempio n. 12
0
def analyze_orders_similarity(data_set, samples):
    repository = Repository(data_set=data_set)
    progress = Progress(math.ceil(((samples - 1) * samples) / 2))
    similarity_threshold = 0.2
    offset = 1
    for orders1 in batch(repository.get_orders(limit=samples - 1), 100):
        for o1 in orders1:
            max_similarity = similarity_threshold
            similar = None
            count = 0
            for orders2 in batch(
                    repository.get_orders(offset=offset,
                                          limit=samples - offset), 100):
                for o2 in orders2:
                    progress.advance()
                    similarity, common, additional1, additional2 = calculate_products_similarity(
                        o1['products'], o2['products'])
                    if similarity > max_similarity:
                        max_similarity = similarity
                        similar = dict(order1_id=o1['order_id'],
                                       user1_id=o1['user_id'],
                                       order2_id=o2['order_id'],
                                       user2_id=o2['user_id'],
                                       similarity=similarity,
                                       common_products=common,
                                       add_products1=additional1,
                                       add_products2=additional2)
                        logger.info("Similarity {} {} {}".format(
                            similar['user1_id'], similar['user2_id'],
                            similarity))

            if similar is not None:
                repository.add_orders_similarity(similar)

            offset += 1
            logger.info("{:.1f}% ETA {}".format(progress.get_progress(),
                                                progress.get_estimated_time()))
Esempio n. 13
0
def analyze_users_similarity(data_set, samples):
    repository = Repository(data_set=data_set)

    progress = Progress(math.ceil(((samples - 1) * samples) / 2))

    offset = 1
    for user_products1 in batch(
            repository.get_user_products(limit=samples - 1), 100):
        for up1 in user_products1:
            max_similarity = 0.1
            similar = None
            for user_products2 in batch(
                    repository.get_user_products(offset=offset,
                                                 limit=samples - offset), 100):
                for up2 in user_products2:
                    progress.advance()
                    similarity, common, additional1, additional2 = calculate_products_similarity(
                        up1['products'], up2['products'])
                    if similarity > max_similarity:
                        max_similarity = similarity
                        similar = dict(user1_id=up1['user_id'],
                                       user2_id=up2['user_id'],
                                       similarity=similarity,
                                       common_products=common,
                                       add_products1=additional1,
                                       add_products2=additional2)
                        logger.info("{} {} {}".format(similar['user1_id'],
                                                      similar['user2_id'],
                                                      similarity))

            if similar is not None:
                repository.add_users_similarity(similar)

            offset += 1
            logger.info("{:.1f}% ETA {}".format(progress.get_progress(),
                                                progress.get_estimated_time()))
Esempio n. 14
0
 def __init__(self, repo_client=Repository(adapter=MongoRepository)):
     self.repo_client = repo_client
Esempio n. 15
0
from database import Repository
from locator import PlaceExtractor
from locator import ScoreCalculator

tweets = Repository.all()

for tweet in tweets:
    print(tweet.content, "\n")
    places = PlaceExtractor(tweet).find_potential_places()
    for place in places:
        score = ScoreCalculator(tweet).for_word(place)
        print(place, score)

    print("\n\n")
 def test_processor_creates_a_tweet_in_the_database(self):
     processor = Processor()
     processor.process(self.tweet)
     stored_tweet = Repository.read('6969')
     assert stored_tweet.id == '6969'
Esempio n. 17
0
File: main.py Progetto: lasida/moksa

def pushToImgBB(visionBase64):
    payload = {'image': visionBase64}
    response = requests.request(
        "POST",
        "https://api.imgbb.com/1/upload?key=a4335073f815a159ee957016a7a2a65c",
        headers={},
        data=payload,
        files=[])
    jsonData = response.json()
    return jsonData['data']['url']


# --> Object Initiate Database
db = Repository()
stream_db = parse_json(db.get_all())


# Routing Root and Rendering Index ( SyncMode, Regstered Device )
@app.route('/')
def index():
    return render_template('index.html',
                           async_mode=socketio.async_mode,
                           devices=devices,
                           stream_db=stream_db,
                           stackholder=stackholder)


#---------------------- SOCKET
Esempio n. 18
0
 def missing_major_info(self):
     """ test missing major info when a student is in that major """
     with self.assertRaises(ValueError):
         Repository('missed_major_info')
Esempio n. 19
0
 def missing_person(self):
     """ test no corresponding person is found based on grades.txt """
     with self.assertRaises(ValueError):
         Repository('grade_person_not_match')
Esempio n. 20
0
def analyze_products_totally(data_set):
    repository = Repository(data_set=data_set)
    global_products = repository.get_products_bought_globally()
    for product in global_products:
        repository.set_product_global(product['_id'], product['count'])
        logger.info("{} {}".format(product['_id'], product['count']))
Esempio n. 21
0
 def __init__(self):
     self.communes, self.places = \
         Repository.all_users_with_places()
Esempio n. 22
0
 def link_tweet_to_place(self, tweet, place):
     return Repository.map_place_to_tweet(tweet, place.id)
Esempio n. 23
0
# SERVERNAME = 'http://escoca.ap-1.evennode.com/'

# --> Registerd Device ( chipID : name )
devices = {'951950972': "ESP-A", '805658940': "ESP-B", '000000000': "ESP-C"}

# -->  Registered User ( whatsapp : name )
stackholder = {'628561655028': "Lasida"}

# --> Setup Flask
app = Flask(__name__)
app.config['SECRET_KEY'] = 'secreto!'
app.config["DEBUG"] = True
app.config['JSONIFY_PRETTYPRINT_REGULAR'] = False

# --> Object Initiate Database
db = Repository()

# Routing Root and Rendering Index ( SyncMode, Regstered Device )


@app.route('/')
def index():
    return render_template('index.html',
                           devices=devices,
                           stackholder=stackholder)


def isset(data, key, typedata="str"):
    if typedata == "str":
        return str(data[key]) if data.get(key) else ""
    else:
Esempio n. 24
0
 def missing_info(self):
     """ test missing info in instructors.txt """
     with self.assertRaises(ValueError):
         Repository('missed_info')