コード例 #1
0
def option3_top_n_pos_common_phrases(business_ids):
    limit = int(input("Please enter the number of n: "))
    list_of_common_phrase = list()
    for business_id in business_ids:
        review = Review("", "", "", "", "", "", "", "", "", "")
        # geit all positive reviews for one business_id
        positive_reviews = review.get_all_reviews_by_business_id_and_sentiment(
            business_id.business_id, 'pos')

        list_of_words = list()
        word_dictionary = dict()
        #split review text into a list of words
        for each_review in positive_reviews:
            words = split_text(each_review.text)
            list_of_words.extend(words)
        #construction of common phrases dictionary for positive reviews
        word_dictionary = dictionary_construction(list_of_words,
                                                  word_dictionary)
        # sort the dictionary by value, turn it into a tuple
        sort_word_dictionary = sorted(word_dictionary.items(),
                                      key=lambda x: x[1],
                                      reverse=True)
        index = 0
        # print result
        for word in sort_word_dictionary:
            if index >= limit:
                break
            print word[0], word[1]
            index += 1
コード例 #2
0
def option6_neg_examples(business_ids):

    for business_id in business_ids:
        review = Review("", "", "", "", "", "", "", "", "", "")
        negative_reviews = review.get_reviews_by_business_id_and_sentiment(
            business_id.business_id, 'neg', 5)
    index = 1
    for negative_review in negative_reviews:
        print str(index) + ".", negative_review.text.rstrip()
        index += 1
コード例 #3
0
def option5_pos_examples(business_ids):

    for business_id in business_ids:
        review = Review("", "", "", "", "", "", "", "", "", "")
        positive_reviews = review.get_reviews_by_business_id_and_sentiment(
            business_id.business_id, 'pos', 5)
    index = 1
    for positive_review in positive_reviews:
        print str(index) + ".", positive_review.text.rstrip(
        )  # adjust the format
        index += 1
コード例 #4
0
ファイル: Book.py プロジェクト: inderjot29/AmazonBook
 def getReviewList(self, asin):
     reviewsResult = []
     totalPage=0
     baseUrl = 'http://www.amazon.com/product-reviews/'
     html = MyHtml.getHtml(baseUrl + asin, ffhead=True)
     pageNumberList = html.xpath(".//ul[@class='a-pagination']//li")
     if len(pageNumberList)>0:
         countOfListItems=len(pageNumberList)
     
         indexOfPageTotal=countOfListItems-2
         listitem=pageNumberList[indexOfPageTotal]
         totalPage=int(listitem.xpath('.//a')[0].text.strip())
     else: 
         totalPage=1
     sortBy = 'recent'
     pageNumber = 1
     foundKnownReview = False
     
     while pageNumber <= totalPage:
         url = baseUrl + asin + \
             '?pageNumber={}&sortBy={}'.format(str(pageNumber), sortBy)
    
         html = MyHtml.getHtml(url, ffhead=True)
         isCount=html.xpath('.//div[@id="cm_cr-product_info"]/div/div[1]/div[2]/span')
         if isCount is not None and len(isCount)>0:
             countOfReviews=int(isCount[0].text.strip())
         else:
             countofReviews=0
         print countOfReviews,"count of reviews"
         if countOfReviews>0:
             divWholeReviewList = html.xpath('.//div[@id="cm_cr-review_list"]')[0]
             divReviewList = divWholeReviewList.xpath('./div[@id]')
             for divReview in divReviewList:
                 reviewID = divReview.attrib['id']
                 if (reviewID in reviewsResult) or (reviewID in self.reviewList):
                     foundKnownReview = True
                     break
                 aReview = Review.Review()
                 aReview.reviewID = reviewID
                 Review.saveReview(review=aReview)
                 reviewsResult.append(reviewID)
             # end of for
             pageNumber += 1
             if foundKnownReview:
                 break
         else:
             pageNumber+=1
         # end of while
     return reviewsResult[::-1]
コード例 #5
0
ファイル: Book.py プロジェクト: inderjot29/AmazonBook
 def printReviews(self, ct=None):
     fout = ''
     flag = False
     if ct is None:
         flag = 1
         if not os.path.exists("../data/" + self.fetchDate.isoformat() +
                               "/review/"):
             os.makedirs(
                 "../data/" + self.fetchDate.isoformat() + "/review/")
         ct = CommonTool()
         if not os.path.exists("../data/" + self.fetchDate.isoformat() +
                               "/review/review.txt"):
             fout = open(
                 "../data/" + self.fetchDate.isoformat() +
                 "/review/review.txt", "w")
             print "writing reviews in new file"
             ct.setFout(fout)
             ct.writeln(Review.Review.tableHead)
         else:
             fout = open(
                 "../data/" + self.fetchDate.isoformat() +
                 "/review/review.txt", "a")
             ct.setFout(fout)
     for reviewID in self.reviewList:
         print "printing reviews"
         review = Review.loadReview(reviewID, self.fetchDate)
         review.printData(ct)
         review.insertReviewDataIntoTable()
     if flag:
         fout.close()
コード例 #6
0
ファイル: MetodeBayar.py プロジェクト: jspmarc/uap
def CekBayar(ListReview, GameName):
    codes = [
        "XXYY12", "BQQPR1", "MMR20K", "MMR900", "MMR1DI", "MMRR69", "69A420"
    ]
    while True:

        print(
            "\n>>>Misalkan user mendapatkan email yang berisi kode pembayarannya<<<\n"
        )
        print(">>>Di dalam email user:<<<\n")
        print("Thank you for purchase, congratulations on your new game.")
        print("We hope you can enjoy the game to its fulless.")
        print("This is your payment code:",
              codes[random.randint(0,
                                   len(codes) - 1)])
        print("\n>>>Akhir dari email user<<<\n")

        time.sleep(5)
        print("Please check your email for your payment code.")
        code = input("\nPlease enter the payment code (enter 2 to cancel): ")
        code = code.upper()
        if (code in codes):
            Review.PendapatGame(ListReview, GameName)
            break
        elif (code == "2"):
            return None
        else:
            print("The payment code is wrong.")
コード例 #7
0
ファイル: Book.py プロジェクト: inderjot29/AmazonBook
 def getReviews(self):
     print 'getReviews'
     if self.numOfReviews > 0:
         if self.reviewList is None:
             self.reviewList = self.getReviewList(self.asin)
             
         else:
             for review in self.getReviewList(self.asin):
                 if review not in self.reviewList:
                     self.reviewList.append(review)
     else:
         self.reviewList = []
     print self.reviewList
     self.calcReviewTopPercent()
     Review.getReviews(self.asin, numOfReviews=self.numOfReviews,
                       bookPublishDate=self.publishDate)
コード例 #8
0
    def parse_metadata_and_vocabulary(self, file, dir):

        words_list = list()
        words_indexes_list = list()

        with open(file, 'r', encoding='ISO-8859-1') as data:
            for line in data.readlines():
                if re.match('^product', line):
                    prod = line.split(': ')[1].strip('\n')
                elif re.match('^review/helpfulness', line):
                    helpfulness = line.split(': ')[1].strip('\n')
                elif re.match('^review/score', line):
                    score = line.split(': ')[1].strip('\n')
                elif re.match('^review/text', line):
                    text = line.split(': ')[1].strip('\n')

                elif re.match('^\s*$', line):
                    product = p.Product(prod)
                    review = r.Review(product.get_product_id(), helpfulness,
                                      score, text)

                    # write review to the metadata file
                    self.write_to_metadata(review, dir)

                    words_list.extend(review.get_text())

        print('Created Metadata file')
        words_list = self.remove_duplicates(words_list)
        # append words and indexes to the lists
        wordlist_asa_string = self.append_to_wordlonglist(
            words_list, words_indexes_list)
        # write data to vocabulary
        self.write_to_vocabulary(wordlist_asa_string, words_indexes_list, dir)
コード例 #9
0
def books(id):

    try:
        user = session["email"]
        result = db.session.query(Books).filter(Books.isbn == id).first()
        data=Review.query.all()
        r=Review.query.filter_by(isbn=id).all()
        if request.method=='POST':
            reviewdata=Review(id,user,request.form['comment'],request.form['rating'])
            user = Review.query.filter_by(email=user,isbn=id).first()
            data=Review.query.all()
            if user is not None:
                print("User had already given rating.")
                var1 = "Error: User had already given rating."
                return render_template("Book_Page.html", user = user,Book_details=result,var1 = var1,comments=r, allreviewdata = data )
            db.session.add(reviewdata)
            db.session.commit()
            var1="Review submitted"
            flash(var1)
            
            return redirect(url_for('books', id = id))

        else:   
            return render_template("Book_Page.html", user = user,Book_details=result,comments=r, allreviewdata = data )
  
    except Exception as e:
        print(e)
        var1 = "You must log in to view the homepage"
        return render_template("reg.html",var1 = var1)
コード例 #10
0
ファイル: main.py プロジェクト: compsy/dynamic-state-tracker
 def review(self):
     fileName, _ = QFileDialog.getOpenFileName(
         self, self.MultiLang.find_correct_word("Open File"), "saves",
         "All Files (*);;Python Files (*.py)")
     if fileName:
         print(fileName)
         window = Review.ReviewWindow(self, fileName)
         window.show()
コード例 #11
0
def make_review_list(path):
    reviews = []
    for i in range(5):
        reviews.append([])
    for curr in parse(path):
        review = Review.Review(curr['reviewText'], curr['overall'])
        reviews[review.get_overall() - 1].append(review)
    return reviews
コード例 #12
0
def guess_review(text):
    global review_points_list, n
    if not review_points_list[0]:
        load_tuple_data()
    query = Review.Query(text)
    query_points = query.get_points()
    output = find_nearest_neighbors(query_points, review_points_list, n)
    return get_most_occurring(output)
コード例 #13
0
def getReviews():
    db = getConnection()
    collection = db.reviews
    uniqueId = 0
    for review in collection.find():
        reviewObj = rv.Review(review)
        reviewList.append(reviewObj)
        uniqueId += 1
コード例 #14
0
    def jsonToObject(self, rev):

        product = rev["product"]
        date = rev["date"]
        email = rev["email"]
        rating = rev["rating"]
        text = rev["reviewText"]
        revObj = Review.Review(product, date, email, rating, reviewText)
        self.reviewList.append(revObj)
コード例 #15
0
ファイル: Book.py プロジェクト: inderjot29/AmazonBook
 def solveReviewSummary(self):
     quotesTable = self.html.xpath(".//table[@id='quotesTable']")
     if len(quotesTable) > 0:
         self.hasQuoteTable = 1
         quotes = quotesTable[0].xpath(
             "./td/a[@class='a-link-normal a-text-normal a-color-base']")
         for quote in quotes:
             words = quote.attrib["href"].split("/")
             # print words
             reviewID = words[4]
             # print reviewID
             #try:
             review = Review.loadReview(reviewID, self.fetchDate)
             #review.setQuoteTable(1)
             Review.saveReview(review)
             #except :
             sys.stderr.write(
                 'quotesTable review not found: {0} {1}\n'.format(
                     self.asin, reviewID))
     else:
         self.hasQuoteTable = 0
コード例 #16
0
def submitreview():
    if not request.is_json:
        message = "Invalid request format"
        return jsonify(message),400
    isbn = request.args.get('isbn') 
    try:
        result = db.session.query(Books).filter(Books.isbn == isbn).first()
    except:
        message = "Please Try again Later"
        return jsonify(message),500
    if result is None:
        message = "Please enter valid ISBN"
        return jsonify(message), 404
    rating = request.get_json()['rating']
    comment = request.get_json()['comment']
    email = request.get_json()['email']
    user = Review.query.filter_by(email=email,isbn=isbn).first()
    if user is not None:
        message = "Sorry you can't review this book again"
        return jsonify(message), 409
    reviewdata=Review(isbn,email,comment,rating)
    try:
        db.session.add(reviewdata)
        db.session.commit()
    except:
        message = "Please Try Again "
        return jsonify(message), 500
    # print(isbn,rating,comment)
    try:
        result = db.session.query(Books).filter(Books.isbn == isbn).first()
        r=Review.query.filter_by(isbn=isbn).all()
    except:
        message = "Please Try again Later"
        return jsonify(message),500
    print(result)
    if result is None:
        message = "No book found"
        return jsonify(message), 404
    response = {}
    reviews = []
    for review in r:
        eachreview = {}
        eachreview["email"] = review.email
        eachreview["rating"] = review.rating
        eachreview["comment"] = review.comment
        reviews.append(eachreview)
    response['isbn'] = result.isbn
    response['title'] = result.title
    response['author'] = result.author
    response['year'] = result.year
    response['reviews'] = reviews
    return jsonify(response), 200 
コード例 #17
0
def parse_reviews_HTML(reviews, data):
	for review in reviews:
		length = "-"
		gotOffer = "-"
		experience = "-"
		difficulty = "-"
		date = review.find("time", { "class" : "date" }).getText().strip()
		role = review.find("span", { "class" : "reviewer"}).getText().strip()
		outcomes = review.find_all("div", { "class" : ["tightLt", "col"] })
		if (len(outcomes) > 0):
			gotOffer = outcomes[0].find("span", { "class" : "middle"}).getText().strip()
		#endif
		if (len(outcomes) > 1):
			experience = outcomes[1].find("span", { "class" : "middle"}).getText().strip()
		#endif
		if (len(outcomes) > 2):
			difficulty = outcomes[2].find("span", { "class" : "middle"}).getText().strip()
		#endif
		appDetails = review.find("p", { "class" : "applicationDetails"})
		if (appDetails):
			appDetails = appDetails.getText().strip()
			tookFormat = appDetails.find("took ")
			if (tookFormat >= 0):
				start = appDetails.find("took ") + 5
				length = appDetails[start :].split('.', 1)[0]
			#endif
		else:
			appDetails = "-"
		#endif
		details = review.find("p", { "class" : "interviewDetails"})
		if (details):
			s = details.find("span", { "class" : ["link", "moreLink"] })
			if (s):
				s.extract() # Remove the "Show More" text and link if it exists
			#endif
			details = details.getText().strip()
		#endif
		questions = []
		qs = review.find_all("span", { "class" : "interviewQuestion"})
		if (qs):
			for q in qs:
				s = q.find("span", { "class" : ["link", "moreLink"] })
				if (s):
					s.extract() # Remove the "Show More" text and link if it exists
				#endif
				questions.append(q.getText().encode('utf-8').strip())
			#endfor
		#endif
		r = Review.Review(date, role, gotOffer, experience, difficulty, length, details, questions)
		data.append(r)
	#endfor
	return data
コード例 #18
0
ファイル: Book.py プロジェクト: inderjot29/AmazonBook
def findReviewRank(asin, reviewID, fetchDate=date.today()):
    try:
        path = getPath(fetchDate)
        with open(path + asin, "rb") as fin:
            book = pickle.load(fin)
            return book.reviewList.index(reviewID)
    except IOError:
        # try:
            # reviewList = Review.getReviewList(asin)
            # return reviewList.index(reviewID)
        # except:
            # return -1
        return Review.getReviewIndex(asin, reviewID)
コード例 #19
0
def option4_top_n_neg_common_phrases(business_ids):
    limit = int(input("Please enter the number of n: "))
    list_of_common_phrase = list()
    for business_id in business_ids:
        review = Review("", "", "", "", "", "", "", "", "", "")
        negative_reviews = review.get_all_reviews_by_business_id_and_sentiment(
            business_id.business_id, 'neg')
        list_of_words = list()
        word_dictionary = dict()
        for each_review in negative_reviews:
            words = split_text(each_review.text)
            list_of_words.extend(words)
        word_dictionary = dictionary_construction(list_of_words,
                                                  word_dictionary)
        sort_word_dictionary = sorted(word_dictionary.items(),
                                      key=lambda x: x[1],
                                      reverse=True)
        index = 0
        for word in sort_word_dictionary:
            if index >= limit:
                break
            print word[0], word[1]
            index += 1
コード例 #20
0
ファイル: Book.py プロジェクト: inderjot29/AmazonBook
    def calcReviewTopPercent(self):
        print 'calcReviewTopPercent'
        import math

        maxRank = len(self.reviewList)
        top1Percent = int(math.ceil(maxRank / 100.0))
        top5Percent = int(math.ceil(maxRank / 20.0))
        top10Percent = int(math.ceil(maxRank / 10.0))

        for rank, reviewID in enumerate(self.reviewList):
            aReview = Review.loadReview(reviewID)
            try:
                aReview.timeRank = rank
            except AttributeError, e:
                sys.stderr.write(str(e) + '\n')
                sys.stderr.write('reviewID' + reviewID)
                sys.exit(-1)
            if rank <= top1Percent:
                aReview.top1Percent = 1
            elif rank <= top5Percent:
                aReview.top5Percent = 1
            elif rank <= top10Percent:
                aReview.top10Percent = 1
            Review.saveReview(aReview)
コード例 #21
0
def parse(file, dir):

    words_list = list()
    words_indexes_list = list()
    wordid_docid = list()

    with open(file, 'r') as data:
        for line in data.readlines():
            if re.match('^product', line):
                prod = line.split(': ')[1].strip('\n')
            elif re.match('^review/helpfulness', line):
                helpfulness = line.split(': ')[1].strip('\n')
            elif re.match('^review/score', line):
                score = line.split(': ')[1].strip('\n')
            elif re.match('^review/text', line):
                text = line.split(': ')[1].strip('\n')

            elif re.match('^\s*$', line):
                product = p.Product(prod)
                review = r.Review(product.get_product_id(), helpfulness, score,
                                  text)

                # delete doubles and add to wordlist.
                words_list.extend(review.get_text())

                # write review to the metadata file
                write_to_metadata(review, dir)

                # set tuples with the words and their docids
                wordid_docid.extend(make_word_docid_tuples(review))

    print('Created Metadata file')
    words_list = remove_duplicates(words_list)
    # append words and indexes to the lists
    wordlist_asa_string = append_to_wordlonglist(words_list,
                                                 words_indexes_list)
    # write words and indexes to the index file
    write_to_index_file(wordlist_asa_string, words_indexes_list, dir)
    # replace the words with word ids
    wordid_docid = make_wordid_docid_tuples(wordlist_asa_string,
                                            words_indexes_list, wordid_docid)

    # create the binary files:
    create_word_to_docs_binary_file(wordid_docid, dir)
    create_doc_to_words_binary_file(wordid_docid, dir)
コード例 #22
0
    def parse_metadata_and_vocabulary(self, file, dir):

        words_list = list()
        words_indexes_list = list()

        metadata = open(dir + 'reviews_metadata.csv', 'w', newline='')
        writer = csv.writer(metadata,
                            delimiter=' ',
                            quotechar='|',
                            quoting=csv.QUOTE_MINIMAL)

        with open(file, 'r', encoding='ISO-8859-1') as data:
            for line in data.readlines():
                if re.match('^product', line):
                    prod = line.split(': ')[1].strip('\n')
                elif re.match('^review/helpfulness', line):
                    helpfulness = line.split(': ')[1].strip('\n')
                elif re.match('^review/score', line):
                    score = line.split(': ')[1].strip('\n')
                elif re.match('^review/text', line):
                    text = line.split(': ')[1].strip('\n')

                elif re.match('^\s*$', line):
                    product = p.Product(prod)
                    review = r.Review(product.get_product_id(), helpfulness,
                                      score, text)

                    # write review to the metadata file
                    writer.writerow([
                        review.id, review.product_id, review.helpfulness,
                        review.score, review.num_of_words
                    ])

                    words_list.extend(review.get_text_without_doubles())

        # close metadata file
        metadata.close()
        print('Created Metadata file')

        words_list = self.remove_duplicates(words_list)
        # append words and indexes to the lists
        wordlist_asa_string = self.append_to_wordlonglist(
            words_list, words_indexes_list)
        # write data to vocabulary
        self.write_to_vocabulary(wordlist_asa_string, words_indexes_list, dir)
コード例 #23
0
ファイル: UserFunction.py プロジェクト: bhaktij24/gitupload
    def __init__(self, idRole):

        #fetch userid from login module
        self.Userid = idRole[0]

        while True:

            #Takes user's input
            userAction = input(
                "1. Add Review/Ratings\n2. View reviews/ratings of a movie\n")

            #compare the input with string.digits
            if userAction in string.digits:

                if userAction == "1":

                    # fetch movie reviews for the user and allow the user to input/update reviews
                    self.checkReview(self.Userid)
                    moreActions = input("Do you still want to continue?[y]: ")
                    if moreActions.lower() == "y":
                        continue
                    else:
                        break

                elif userAction == "2":

                    # fetch movie reviews in visually attracted format(graph or tabular forms)
                    review = Review.Review()
                    moreActions = input("Do you still want to continue?[y]: ")
                    if moreActions.lower() == "y":
                        continue
                    else:
                        break

                #Invalid user input
                else:
                    print(
                        "No such action available! Please login again to enter the correct input."
                    )
                    break

            #condition to verify special characters
            else:
                print("Special characters or alphabets are not for input.")
                continue
コード例 #24
0
ファイル: BookList.py プロジェクト: inderjot29/AmazonBook
 def solveReview(self):
     with open("../data/" + self.fetchDate.isoformat() + "/review.txt", "w")\
             as fout:
         ct = CommonTool(fout)
         fout.write(Review.Review.tableHead)
         fout.write('\n')
         fout.flush()
         # ct.writeln(Review.Review.tableHead)
         for i, reviewID in enumerate(self.reviewList):
             if reviewID == '':
                 continue
             print 'solve review {0} of {1}: {2}'.format(
                 i, len(self.reviewList), reviewID)
             review = Review.loadReview(reviewID)
             review.printData(ct)
             if review.isNew:
                 print 'isNewReview'
                 if review.reviewerID not in self.reviewerList:
                     self.reviewerList.append(review.reviewerID)
                 review.isNew = False
コード例 #25
0
def createUserReview():
    create_user_form = CreateUserReview(request.form)
    if request.method == 'POST' and create_user_form.validate():
        review_dict = {}
        db = shelve.open('GoFit.db', 'c')

        try:
            review_dict = db['Review']
        except:
            print("Error in retrieving Users from Gofit.db.")
        review = Review.userReview(session['user_id'],
                                   session['first_name'],
                                   session['last_name'],
                                   review=create_user_form.review.data)

        review_dict[review.get_user_id()] = review
        db['Review'] = review_dict

        db.close()
        return redirect(url_for('to_profile'))
    return render_template("userReview.html", form=create_user_form)
コード例 #26
0
def create_user_rev():
    create_user_form = CreateReview(request.form)
    if request.method == 'POST' and create_user_form.validate():
        staff_dict = {}
        db = shelve.open('GoFit.db', 'c')

        try:
            staff_dict = db['Review']
        except:
            print("Error in retrieving Users from Gofit.db.")
        staff = Review.userReview(first_name=create_user_form.first_name.data,
                                  last_name=create_user_form.last_name.data,
                                  review=create_user_form.review.data)

        staff_dict[staff.get_user_id()] = staff
        db['Review'] = staff_dict

        db.close()

        return redirect(url_for('review'))
    return render_template('createReview.html', form=create_user_form)
コード例 #27
0
def main():
    review = Review("", "", "", "", "", "", "", "", "",
                    "")  # this will call your constructor
    # get 50 results from databases
    reviews = review.get_reviews("50")
    for a_review in reviews:
        #construction of sentiment table
        blob = TextBlob(a_review.text, analyzer=NaiveBayesAnalyzer())
        text_sentiment = blob.sentiment

        text_sentiment = text_sentiment[
            SENTIMENT_TYPE]  #text_sentiment will either be pos (for positive) or neg (for negative)
        #here is where we create a Sentiment object
        sentiment = Sentiment(a_review.review_id, a_review.business_id,
                              text_sentiment)
        sentiment.insert(
        )  #this will insert information into the sentiment table

    #construction of review_stats table
    review_stats = Review_stats("", "", "", "", "")
    review_stats.insert(
    )  #insert positive and negative reviews' information to review_stats table

    #construction of common_phrases table
    business = Business("", "", "", "", "", "", "", "", "", "", "")
    business_ids = business.get_all_business_ids(
    )  #acquire all business_ids from sentiment table
    Reviews = list()
    words = list()
    list_of_words = list()
    word_dictionary = dict()
    for business_id in business_ids:
        review = Review("", "", "", "", "", "", business_id.business_id, "",
                        "", "")
        Reviews = review.get_reviews_by_business_id(
        )  # get all reviews by business_id
        insert_words(business_id.business_id, Reviews,
                     word_dictionary)  # insert data into common_phrases table
コード例 #28
0
def movie_extractor(directory_path, aspects, set_of_movies, k):
    """
    Given the directory that contains xml files - each of them 
    representing a single review - this function instantiates movie
    objects. At the end of the function, it is computed the score of 
    each aspect belonging to the movie.

    Args:
        directory_path (str): the directory where the single reviews
        xml files are kept.

        aspects(dict): is a dict that maps each aspect to it's KL relevance
        number.

        set of movies(list): list containing the instantiated movie objects.

        k (int) : represents the number of the chosen main aspects (top k aspects)

        Returns:
            None.
    """

    for dirpath, dirnames, files in os.walk(directory_path):

        if dirpath[-4:] != "iews":

            new_movie = Movie.Movie(dirpath[-4:])

            for file in files:
                file_name = os.path.join(dirpath, file)
                new_review = Review.Review(file_name)
                new_review.review_extractor(aspects)
                #new_review.review_extractor(aspects)
                new_movie.reviews.append(new_review)
                new_movie.number_of_reviews += 1

            for review in new_movie.reviews:
                #aspects_in_review is a counter; it contains
                #the counting of aspects in the current review:
                aspects_in_review = review.occurrences_of_each_aspect

                #the above loop iterates through each aspect
                #belonging to the counter attribute ("aspects_in_review") of the review:
                for current_aspect in aspects_in_review:
                    #the counter "aspects_in_review" is acessed;
                    #the number of occurences of the "current_aspect" is
                    #attritbuted to "current_aspect_count":

                    current_aspect_count = aspects_in_review[current_aspect]

                    #the KL value of the "current_aspect" is acessed
                    #in the dict of aspects:
                    aspect_KL_rel = aspects[current_aspect]
                    review_sent = review.average_sentiment

                    current_aspect_score = current_aspect_count * aspect_KL_rel * review_sent

                    if current_aspect == "film":
                        with open("debug_score" + new_movie.xml + ".txt",
                                  'a+',
                                  encoding="utf-8") as f:
                            print(current_aspect, file=f)
                            print("current_aspect_count: ",
                                  current_aspect_count,
                                  file=f)
                            print("aspect_KL_rel: ", aspect_KL_rel, file=f)
                            print("review_sent: ", review_sent, file=f)
                            print("current_aspect_score: ",
                                  current_aspect_score,
                                  file=f)
                            print("new_movie.number_of_reviews: ",
                                  new_movie.number_of_reviews,
                                  file=f)
                            print("------------", file=f)

                    current_aspect_score = current_aspect_score / new_movie.number_of_reviews

                    #total = new_movie.temp_acumulator_aspect[current_aspect] + current_aspect_score

                    if current_aspect not in list(
                            new_movie.aspects_score.keys()):
                        new_movie.aspects_score[current_aspect] = 0

                    score_acumulator = new_movie.aspects_score[
                        current_aspect] + current_aspect_score

                    new_movie.aspects_score[current_aspect] = score_acumulator

            new_movie.top_k_aspects_evaluation(k)
            print(new_movie.aspects_score["film"])
            set_of_movies.append(new_movie)
コード例 #29
0
ファイル: test.py プロジェクト: clemsonedx/userReviewAnalysis
 def setUpClass(cls):
     cls.reader = csv.DictReader(open('excel.csv'))
     cls.review = Review(cls.reader.next())
コード例 #30
0
def main():

    # ---------------------------------------------------------------
    #
    #                          Cleaning
    #
    # ---------------------------------------------------------------

    # opening the file and reading it
    f = open("data.txt", "r")
    contents = f.readlines()
    nbOfDoc = 0
    stopWords = ('i', 'the', 'a', 'an', 'to', 'it', 'as', 'and', 'is', 'does',
                 'not', 'was', 'so', 'than', 'of', 'for', 'my', 'you', 'we',
                 'they', 'this', 'that', 'with', 'are', 'were', 'your',
                 'their', 'no', 'yes', 'or', 'them', 'did', 'had', 'will',
                 'may', 'mine', '', 's', 've', 'd', 'can', 'on', 'up', 'down',
                 'but', 'or', 'me', 'out', ',', 'if', 'by', "don't", "i've",
                 're', 'be', 'in', 'd', 'have', 'all', 'got', 'go', 'much',
                 '.', 'on', 'one', 'should', 'have', 'these')

    collection = Collection()
    # reading all lines one by one
    for line in contents:

        # xy contains the 2 helpfulness scores
        xy = ExtractHelpful(line)

        # if not useless, we look at the line
        if xy[1] != "0":
            score = int(xy[0]) / int(xy[1])

            # review is a list of the strings contained in the original review
            # but the strings are only separated according to white spaces
            # in the original review (see split function)
            # therefore we need to clean the strings of the punctuations symbols
            # and of the stop words
            rawReview = ExtractReview(line.lower())
            cleanedReview = CleanStopWord(rawReview, stopWords)
            cleanedReview = RemovePunct(cleanedReview, stopWords)
            cleanedReview = RemovePunct(cleanedReview, stopWords)

            # collection is a list of perfectly cleaned
            # reviews and their scores.
            review = Review(cleanedReview, score)
            if review.nbOfWords is not 0:
                collection.AddReview(review)

            nbOfDoc = nbOfDoc + 1

    # ---------------------------------------------------------------
    #
    #                   Feature Selection
    #
    # ---------------------------------------------------------------

    # we create a dictionary dft that will contain all the words encountered in
    # any document. The words will be the key and be paired with the number of
    # documents that contain them

    dft = collection.SetDFT()

    print("\n dft : ")
    print(dft)

    tfidf = collection.SetTFIDF()

    print("\n tfidf : ")
    print(tfidf)

    relWordsScores = collection.SetRelevantWords(0.5)

    print("\n relevant words and scores : ")
    print(relWordsScores)

    relWords = collection.relWords

    print("\n relevant words only : ")
    print(relWords)

    # we now have all our sorted relevant words stocked in relWords

    # ---------------------------------------------------------------
    #
    #                   Training & Predictions
    #
    # ---------------------------------------------------------------

    allReviews = collection.GetListOfReviews()
    trainingColl = Collection()
    testColl = Collection()
    i = 1

    # we create two  collections, one of training, one of test
    for review in allReviews:
        if i % 10 == 0:
            testColl.AddReview(review)
        else:
            trainingColl.AddReview(review)
        i += 1

    print("\n test and train set are done")

    trainSorter = PriorityQueue()
    testSorter = PriorityQueue()

    # xTrain will be used for the training of the regressions
    xTrain = np.ndarray((1, trainingColl.nbOfReviews))
    xTest = np.ndarray((1, testColl.nbOfReviews))

    # xTrainList and yTrain will be used for plots. yTrain will also be used for
    # training purposes
    xTrainList = []
    yTrain = []
    xTestList = []
    yTest = []

    # for every review we compute a score based on the sum of the tfidf scores
    # of the relevant words divided by the number of relevant words, and we
    # associate it with the relevance score of the review. We put this tuple in
    # the priority queue to sort them by review score. We do this for both
    # the training and test sample

    for review in trainingColl.listOfReviews:
        nbOfRelWords = 0
        reviewClass = review.GetScore()
        reviewScore = 0
        for word in review.GetSetOfWords():
            if word in collection.relWords:
                nbOfRelWords += 1
                reviewScore += tfidf[word]
        # reviewScore /= review.nbOfWords
        if reviewScore != 0:
            reviewScore /= nbOfRelWords
        trainSorter.put((reviewScore, reviewClass))

    print("all training reviews are treated")

    for review in testColl.listOfReviews:
        nbOfRelWords = 0
        reviewClass = review.GetScore()
        reviewScore = 0
        for word in review.GetSetOfWords():
            if word in collection.relWords:
                nbOfRelWords += 1
                reviewScore += tfidf[word]
        # reviewScore /= review.nbOfWords
        if reviewScore != 0:
            reviewScore /= nbOfRelWords
        testSorter.put((reviewScore, reviewClass))

        print("all testing reviews are treated")

    i = 0
    while not trainSorter.empty():

        info = trainSorter.get()
        xTrain[0][i] = info[0]
        xTrainList.append(info[0])
        yTrain.append(info[1])
        i += 1

    print("x and y built")

    j = 0
    while not testSorter.empty():
        info = testSorter.get()
        xTest[0][j] = info[0]
        xTestList.append(info[0])
        yTest.append(info[1])
        j += 1

    xTrain2 = np.reshape(xTrain, (-1, 1))
    xTest2 = np.reshape(xTest, (-1, 1))

    testLinRegPred = LinearRegPredictions(xTrain2, xTrainList, yTrain, xTest2,
                                          xTestList, yTest)

    testSvrPred = SVRPrediction(xTrain2, xTrainList, yTrain, xTest2, xTestList,
                                yTest)

    # ---------------------------------------------------------------
    #
    #                            Extra
    #
    # ---------------------------------------------------------------

    # xTrainExtra will be used for the training of the regressions
    # it will contain the average relevant word scores and the number of words
    # of the reviews of the training set

    xTrainExtra = np.ndarray((2, trainingColl.nbOfReviews))
    xTestExtra = np.ndarray((2, testColl.nbOfReviews))

    xTrainList = []
    yTrain = []
    xTestList = []
    yTest = []
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-s', '--select', action='store_true')
    parser.add_argument('-t', '--test', action='store_true')
    parser.add_argument('-c', '--count', action='store_true')
    parser.add_argument('-m', '--merge', '--combine', action='store_true')
    parser.add_argument('-e', '--extract', action='store_true')
    parser.add_argument('-d', '--debuginput', action='store_true')
    parser.add_argument('-r', '--review', action='store_true')
    parser.add_argument('--replace-debug', action='store_true')
    parser.add_argument('-p', '--plot-data', action='store_true')
    parser.add_argument('--base-data', action='store_true')
    parser.add_argument('--features', default='original')
    parser.add_argument('-f', '--homology-filter', action='store_true')
    parser.add_argument('-y', '--classify', action='store_true')
    parser.add_argument('--grid-search', action='store_true')
    parser.add_argument('--plot', action='store_true')
    parser.add_argument('--fit', action='store_true')
    parser.add_argument('--count-total-number-of-genes', action='store_true')

    args = parser.parse_args()

    if args.select:
        if args.test:
            selector = Selector("config/Test/selection_config.json")
        else:
            selector = Selector("config/selection_config.json")
        selector.select()
        selector.selected_to_folder()

    if args.count:
        if args.test:
            counter = Counter("config/Test/counter_config.json")
        else:
            counter = Counter("config/counter_config.json")
        counter.count_all_viruses()

    if args.merge:
        if args.test:
            combiner = Combiner("config/Test/combiner_config.json")
        else:
            combiner = Combiner("config/combiner_config.json")
        combiner.combine_all_viruses()

    if args.debuginput:
        debug_input_collector = DebugInfoCollector("config/debug_info_collector_config.json")
        if args.replace_debug:
            debug_input_collector.collect(True)
        else:
            debug_input_collector.collect()

    if args.review:
        import Review
        Review.run()

    if args.plot_data:
        data_plotter = DataPlotter("config/data_plotter_config.json")
        data_plotter.plot()

    if args.base_data:
        base_data = BaseData("config/base_data_config.json")
        base_data.create_data()

    if args.homology_filter:
        homology_filter = HomologyFilter('config/homology_filter.json')
        homology_filter.filter()

    if args.extract:
        feature_extractor = FeatureExtraction("config/feature_extraction_config.json")
        feature_extractor.extract(args.features)

    if args.count_total_number_of_genes:
        combiner = Combiner("config/combiner_config.json")
        combiner.print_number_of_genes()

    if args.classify:
        if args.grid_search:
            MLgrid = [
                {
                    "booster": ["gblinear"],
                    # "lambda": [0, 0.0001, 0.001],
                    "lambda": [0],
                    # "updater": ["shotgun", "coord_descent"],
                    "updater": ["coord_descent", "shotgun"],
                    # "feature_selector": ["cyclic", "shuffle", "random", "greedy", "thrifty"]
                    "feature_selector": ["shuffle"]
                }
                # {
                #     "booster": ["gbtree"],
                #     # "max_depth": range(3, 10, 2),
                #     # "min_child_weight": range(1, 6, 2)
                # }
            ]
            _1vsAgrid = [
                {
                    "estimator__booster": ["gblinear"],
                    "estimator__lambda": [0.1],
                    "estimator__updater": ["coord_descent"],
                    "estimator__feature_selector": ["shuffle"]
                },
                # {
                #     "estimator__booster": ["gbtree"],
                #     "estimator__max_depth": range(3, 10, 2),
                #     "estimator__min_child_weight": range(1, 6, 2)
                # }

            ]
            RRgrid = [
                {
                    "estimator__booster": ["gblinear"],
                    "estimator__lambda": [0.1],
                    "estimator__updater": ["coord_descent"],
                    "estimator__feature_selector": ["shuffle"]
                },
                # {
                #     "estimator__booster": ["gbtree"]
                # #     "estimator__max_depth": range(3, 10, 2),
                # #     "estimator__min_child_weight": range(1, 6, 2)
                # }

            ]
            classification = Classification('config/classification_config.json', args.features)
            classification.grid_search('ML', 'XGBoost', MLgrid, 200, 'no-pca')
        else:
            if args.fit:
                classification = Classification('config/classification_config.json', args.features)
                classification.fit_all()

            if args.plot:
                cp = ClassificationPlotter('config/classification_config.json', args.features)
                cp.plot_all()
コード例 #32
0
 def pushReviewbtnClicked(self):
     dlg = Review()
     dlg.exec_()
コード例 #33
0
ファイル: BookList.py プロジェクト: inderjot29/AmazonBook
 def printData(self):
     with open("../data/" + self.fetchDate.isoformat() + "/dataAll.txt",
               "w") as fout:
         ct = CommonTool(fout)
         fout.write("\t".join(self.tableHeadList))
         fout.write('\n')
         fout.flush()
         for i, reviewID in enumerate(self.reviewList):
             if reviewID == '':
                 continue
             print 'solve review {0} of {1}: {2}'.format(
                 i, len(self.reviewList), reviewID)
             review = Review.loadReview(reviewID)
             book = Book.loadBookByAsin(review.asin)
             reviewer = Reviewer.loadReviewer(review.reviewerID)
             ct.write(reviewID)
             ct.write(review.asin)
             ct.write(review.reviewerID)
             ct.write(reviewer.rName)
             ct.write(reviewer.tRev1)
             ct.write(reviewer.tRev10)
             ct.write(reviewer.tRev50)
             ct.write(reviewer.tRev100)
             ct.write(reviewer.tRev500)
             ct.write(reviewer.tRev1000)
             ct.write(reviewer.tRevHall)
             ct.write(reviewer.vVoice)
             ct.write(review.verified)
             ct.write(review.rate)
             ct.write(review.title)
             ct.write(review.date)
             ct.write(review.fetchDate)
             ct.write(review.reviewBookDate)
             ct.write(review.elapsedDate)
             ct.write(review.helpful)
             ct.write(review.total)
             ct.write(review.helpfulness)
             ct.write(review.helpfulRank)
             ct.write(review.timeRank)
             #ct.write(review.top1Percent)
             #ct.write(review.top5Percent)
             #ct.write(review.top10Percent)
             ct.write(review.description)
             ct.write(review.numOfComments)
             ct.write(review.comment)
             #ct.write(review.isQuoteTable)
             ct.write(review.lastReviewRank)
             ct.write(book.url)
             ct.write(book.tag)
             ct.write(book.allowPreview)
             ct.write(book.binding)
             ct.write(book.publishDate)
             ct.write(book.author)
             ct.write(book.authorInfo)
             ct.write(book.rate)
             ct.write(book.numOfReviews)
             ct.write(book.kindlePrice)
             ct.write(book.hardcoverPrice)
             ct.write(book.paperbackPrice)
             ct.write(book.bookDsc)
             ct.write(book.listPrice)
             ct.write(book.pages)
             ct.write(book.isbn10)
             ct.write(book.isbn13)
             ct.write(book.subrank)
             ct.write(book.hasEditorialReview)
             ct.write(book.editorialReview)
             #ct.write(book.hasQuoteTable)
             ct.write(reviewer.email)
             ct.write(reviewer.webPage)
             ct.write(reviewer.hasPhoto)
             ct.write(reviewer.rNum)
             ct.write(reviewer.helpRate)
             ct.write(reviewer.hVote)
             ct.write(reviewer.tVote)
             ct.write(reviewer.avgRate)
             ct.write(reviewer.fRevTime)
             ct.write(reviewer.lRevTime)
             ct.write(reviewer.duration)
             if reviewer.rReal == "N/A":
                 ct.write(0)
             else:
                 ct.write(1)
             if reviewer.location == "N/A":
                 ct.write(0)
             else:
                 ct.write(1)
             if reviewer.aboutMe == "N/A":
                 ct.write(0)
             else:
                 ct.write(1)
             if reviewer.interest == "N/A":
                 ct.write(0)
             else:
                 ct.write(1)
             ct.write(review.fromFormat)
             # if review.fromFormat == "Hardcover":
             #     ct.write(0)
             # elif review.fromFormat == "Paperback":
             #     ct.write(1)
             # else:
             #     ct.write(2)
             if reviewer.rRank == "N/A":
                 ct.write(0)
             else:
                 ct.write(reviewer.rRank)
             ct.writeln(book.rank)