Esempio n. 1
0
    def import_user_item(self, file):
        with open(file, 'r+') as in_file:
            reader = csv.reader(in_file, delimiter=',')
            next(reader, None)
            with self._driver.session() as session:
                self.execute_without_exception("CREATE CONSTRAINT ON (u:User) ASSERT u.userId IS UNIQUE")

                tx = session.begin_transaction()
                i = 0
                for row in reader:
                    try:
                        if row:
                            user_id = strip(row[0])
                            movie_id = strip(row[1])
                            rating = strip(row[2])
                            timestamp = strip(row[3])
                            query = """
                                MATCH (movie:Movie {movieId: $movieId})
                                MERGE (user:User {userId: $userId})
                                MERGE (user)-[:RATED {rating: $rating, timestamp: $timestamp}]->(movie)
                            """
                            tx.run(query, {"movieId":movie_id, "userId": user_id, "rating":rating, "timestamp": timestamp})
                            i += 1
                            if i == 1000: 
                                tx.commit()
                                i = 0
                                tx = session.begin_transaction()
                    except Exception as e:
                        print(e, row, reader.line_num)
                tx.commit()
Esempio n. 2
0
    def import_movie_details(self, file):
        print("Importing details of movies")
        with open(file, 'r+') as in_file:
            reader = csv.reader(in_file, delimiter=',')
            next(reader, None)
            with self._driver.session() as session:
                self.execute_without_exception("CREATE CONSTRAINT ON (a:Person) ASSERT a.name IS UNIQUE;")
                tx = session.begin_transaction()
                i = 0
                j = 0
                for row in reader:
                    try:
                        if row:
                            movie_id = strip(row[0])
                            imdb_id = strip(row[1])
                            movie = self._ia.get_movie(imdb_id)
                            self.process_movie_info(movie_info=movie, tx=tx, movie_id=movie_id)
                            i += 1
                            j += 1

                        if i == 10:
                            tx.commit()
                            print(j, "movie details imported")
                            i = 0
                            tx = session.begin_transaction()
                    except Exception as e:
                        print(e, row, reader.line_num)
                        
                tx.commit()
                print(j, "lines processed")
Esempio n. 3
0
 def get_movie_info(self):
     while True:
         row = self._movie_queue.get()
         with self._print_lock:
             print("Getting info for row: ", row)
         movie_id = strip(row[0])
         imdb_id = strip(row[1])
         # get a movie
         retry = 0
         while retry < 10:
             try:
                 movie = self._ia.get_movie(imdb_id)
                 with self._print_lock:
                     print("Writing to the other queue: ", movie)
                 self._writing_queue.put([movie_id, movie])
                 break
             except:
                 with self._print_lock:
                     print("An error occurred")
                 retry = retry + 1
                 if retry == 10:
                     with self._print_lock:
                         print("Error while getting", row)
                 else:
                     with self._print_lock:
                         print("Failed...... ", retry)
                     time.sleep(10)
         self._movie_queue.task_done()
Esempio n. 4
0
    def import_event_data(self, file):
        with self._driver.session() as session:
            self.execute_without_exception("CREATE CONSTRAINT ON (u:User) ASSERT u.userId IS UNIQUE")
            self.execute_without_exception("CREATE CONSTRAINT ON (i:Item) ASSERT i.itemId IS UNIQUE")
            self.execute_without_exception("CREATE CONSTRAINT ON (t:Time) ASSERT t.value IS UNIQUE")
            self.execute_without_exception("CREATE CONSTRAINT ON (l:Location) ASSERT l.value IS UNIQUE")
            self.execute_without_exception("CREATE CONSTRAINT ON (c:Companion) ASSERT c.value IS UNIQUE")

            j = 0
            with open(file, 'r+') as in_file:
                reader = csv.reader(in_file, delimiter=',')
                next(reader, None)
                tx = session.begin_transaction()
                i = 0
                query = """
                        MERGE (user:User {userId: $userId})
                        MERGE (time:Time {value: $time})
                        MERGE (location:Location {value: $location})
                        MERGE (companion:Companion {value: $companion})
                        MERGE (item:Item {itemId: $itemId})
                        CREATE (event:Event {rating:$rating})
                        CREATE (event)-[:EVENT_USER]->(user)
                        CREATE (event)-[:EVENT_ITEM]->(item)
                        CREATE (event)-[:EVENT_LOCATION]->(location)
                        CREATE (event)-[:EVENT_COMPANION]->(companion)
                        CREATE (event)-[:EVENT_TIME]->(time)
                    """

                for row in reader:
                    try:
                        if row:
                            user_id = row[0]
                            item_id = strip(row[1])
                            rating = strip(row[2])
                            time = strip(row[3])
                            location = strip(row[4])
                            companion = strip(row[5])
                            tx.run(query,
                                   {"userId": user_id, "time": time, "location": location, "companion": companion,
                                    "itemId": item_id, "rating": rating})
                            i += 1
                            j += 1
                            if i == 1000:
                                tx.commit()
                                print(j, "lines processed")
                                i = 0
                                tx = session.begin_transaction()
                    except Exception as e:
                        print(e, row)
                tx.commit()
                print(j, "lines processed")
            print(j, "lines processed")
Esempio n. 5
0
    def import_user_item(self, file):
        with open(file, 'r+') as in_file:
            reader = csv.reader(in_file, delimiter=',')
            next(reader, None)
            with self._driver.session() as session:
                self.execute_without_exception(
                    "CREATE CONSTRAINT ON (u:User) ASSERT u.userId IS UNIQUE")
                self.execute_without_exception(
                    "CREATE CONSTRAINT ON (u:Item) ASSERT u.itemId IS UNIQUE")

                tx = session.begin_transaction()
                i = 0
                j = 0
                query = """
                    MERGE (item:Item {itemId: $itemId})
                    MERGE (user:User {userId: $userId})
                    MERGE (user)-[:PURCHASES { timestamp: $timestamp}]->(item)
                """
                for row in reader:
                    try:
                        if row:
                            timestamp = strip(row[0])
                            user_id = strip(row[1])
                            event_type = strip(row[2])
                            item_id = strip(row[3])

                            if event_type == "transaction":
                                tx.run(
                                    query, {
                                        "itemId": item_id,
                                        "userId": user_id,
                                        "timestamp": timestamp
                                    })
                                i += 1
                                j += 1
                                if i == 1000:
                                    tx.commit()
                                    print(j, "lines processed")
                                    i = 0
                                    tx = session.begin_transaction()
                    except Exception as e:
                        print(e, row, reader.line_num)
                tx.commit()
                print(j, "lines processed")
Esempio n. 6
0
    def import_movies(self, file):
        print("import movies")
        with open(file, 'r+') as in_file:
            reader = csv.reader(in_file, delimiter=',')
            next(reader, None)
            with self._driver.session() as session:
                self.execute_without_exception("CREATE CONSTRAINT ON (a:Movie) ASSERT a.movieId IS UNIQUE; ")
                self.execute_without_exception("CREATE CONSTRAINT ON (a:Genre) ASSERT a.genre IS UNIQUE; ")

                tx = session.begin_transaction()

                i = 0
                j = 0
                for row in reader:
                    try:
                        if row:
                            movie_id = strip(row[0])
                            title = strip(row[1])
                            genres = strip(row[2])
                            query = """
                                CREATE (movie:Movie {movieId: $movieId, title: $title})
                                with movie
                                UNWIND $genres as genre
                                MERGE (g:Genre {genre: genre})
                                MERGE (movie)-[:HAS]->(g)
                            """
                            tx.run(query, {"movieId": movie_id, "title": title, "genres": genres.split("|")})
                            i += 1
                            j += 1

                        if i == 1000:
                            tx.commit()
                            print(j, "movies processed")
                            i = 0
                            tx = session.begin_transaction()
                            
                    except Exception as e:
                        print(e, row, reader.line_num)
                        
                tx.commit()
                print(j, "lines processed")
Esempio n. 7
0
    def import_session_data(self, file):
        with self._driver.session() as session:
            self.execute_without_exception("CREATE CONSTRAINT ON (s:Session) ASSERT s.sessionId IS UNIQUE")
            self.execute_without_exception("CREATE CONSTRAINT ON (i:Item) ASSERT i.itemId IS UNIQUE")
            dtype = {"sessionID": np.int64, "itemID": np.int64, "category": object}
            j = 0
            for chunk in pd.read_csv(file,
                                     header=0,
                                     dtype=dtype,
                                     names=['sessionID', 'timestamp', 'itemID', 'category'],
                                     parse_dates=['timestamp'],
                                     chunksize=10 ** 6):
                df = chunk
                tx = session.begin_transaction()
                i = 0
                query = """
                        MERGE (session:Session {sessionId: $sessionId})
                        MERGE (item:Item {itemId: $itemId, category: $category})
                        CREATE (click:Click {timestamp: $timestamp})
                        CREATE (session)-[:CONTAINS]->(click)
                        CREATE (click)-[:IS_RELATED_TO]->(item)
                    """

                for row in df.itertuples():
                    try:
                        timestamp = row.timestamp
                        session_id = row.sessionID
                        category = strip(row.category)
                        item_id = row.itemID
                        tx.run(query, {"sessionId": session_id, "itemId": item_id, "timestamp": str(timestamp),
                                       "category": category})
                        i += 1
                        j += 1
                        if i == 10000:
                            tx.commit()
                            print(j, "lines processed")
                            i = 0
                            tx = session.begin_transaction()
                    except Exception as e:
                        print(e, row)
                tx.commit()
                print(j, "lines processed")
            print(j, "lines processed")
Esempio n. 8
0
    def import_session_data(self, file):
        dtype = {"sessionID": np.int64, "itemID": np.int64, "category": object}
        j = 0
        sess_clicks = {}
        for chunk in pd.read_csv(file,
                                 header=0,
                                 dtype=dtype,
                                 names=['sessionID', 'timestamp', 'itemID', 'category'],
                                 parse_dates=['timestamp'],
                                 chunksize=10 ** 6):
            df = chunk
            for row in df.itertuples():
                timestamp = time.mktime(row.timestamp.timetuple())
                session_id = row.sessionID
                category = strip(row.category)
                item_id = row.itemID

                item = item_id, category, timestamp
                j += 1
                if session_id in sess_clicks:
                    sess_clicks[session_id] += [item]
                else:
                    sess_clicks[session_id] = [item]

            print(j, "lines processed")

        print(j, "lines processed")
        print("total number of sessions", len(sess_clicks))
        # Filter out length <5  sessions
        for s in list(sess_clicks):
            if len(sess_clicks[s]) < 5:
                del sess_clicks[s]

        for i in list(sess_clicks):
            sorted_clicks = sorted(sess_clicks[i], key=operator.itemgetter(2))
            sess_clicks[i] = [{'itemId': c[0], 'category': c[1], 'timestamp': c[2]} for c in sorted_clicks]
            # sess_clicks[i] = sorted_clicks

        print("total number of valid sessions", len(sess_clicks))
        print("start db ingestion")

        with self._driver.session() as session:
            self.executeNoException(session, "CREATE CONSTRAINT ON (s:Session) ASSERT s.sessionId IS UNIQUE")
            self.executeNoException(session, "CREATE CONSTRAINT ON (i:Item) ASSERT i.itemId IS UNIQUE")

            tx = session.begin_transaction()
            i = 0
            j = 0
            query = """
                CREATE (session:Session {sessionId: $sessionId})
                WITH session
                UNWIND $items as entry
                MERGE (item:Item {itemId: entry.itemId, category: entry.category})
                CREATE (click:Click {timestamp: entry.timestamp})
                CREATE (click)-[:IS_RELATED_TO]->(item)
                CREATE (session)-[:CONTAINS]->(click)
            """
            for session_id in list(sess_clicks):
                try:
                    items = sess_clicks[session_id]
                    tx.run(query, {"sessionId": session_id, "items": items})
                    i += 1
                    j += 1
                    if i == 2000:
                        tx.commit()
                        print(j, "lines processed")
                        i = 0
                        tx = session.begin_transaction()
                except Exception as e:
                    print(e, session_id)
            try:
                if session.has_transaction():
                    tx.commit()
            except Exception as e:
                print(e)
            print(j, "sessions created processed")
        return sess_clicks