コード例 #1
0
def calculateInterestingBusinessStatistics(cross_time_graphs, not_mergeable_businessids, bnss_score_all_time_map):
    interesting_bnss_across_time = set([bnss_key for time_key in not_mergeable_businessids for bnss_key in not_mergeable_businessids[time_key]])

    bnss_score_across_time_with_interestingMarked = dict()

    for bnss_key in interesting_bnss_across_time:
        score_across_time_with_intersting_marked = { time_key:(bnss_score_all_time_map[bnss_key][time_key],(0,0),True) \
                                                   if time_key in bnss_score_all_time_map[bnss_key] and time_key in not_mergeable_businessids \
                                                   and bnss_key in not_mergeable_businessids[time_key]\
                                                   else '-' if time_key not in bnss_score_all_time_map[bnss_key] \
                                                   else (bnss_score_all_time_map[bnss_key][time_key],(0,0),False) \
                                                   for time_key in cross_time_graphs.iterkeys()}
        bnss_score_across_time_with_interestingMarked[bnss_key] = score_across_time_with_intersting_marked


    for time_key in cross_time_graphs:
        graph = cross_time_graphs[time_key]
        dummy_lbp = LBP(graph)
        (fakeUsers,honestUsers,unclassifiedUsers,\
            badProducts,goodProducts,unclassifiedProducts,\
            fakeReviewEdges,realReviewEdges,unclassifiedReviewEdges) = dummy_lbp.calculateBeliefVals()
        for fakeReviewEdge in fakeReviewEdges:
            (siaObject1,siaObject2) = fakeReviewEdge
            if siaObject1.getNodeType() == SIAUtil.PRODUCT:
                bnssIdFromEdge = siaObject1.getId()
                if bnssIdFromEdge in bnss_score_across_time_with_interestingMarked:
                    score,(edge_sentiment_negative,edge_sentiment_positive),isInteresting = bnss_score_across_time_with_interestingMarked[bnssIdFromEdge][time_key]
                    if dummy_lbp.getEdgeDataForNodes(*fakeReviewEdge).getReviewSentiment() == SIAUtil.REVIEW_TYPE_NEGATIVE:
                        edge_sentiment_negative+=1
                    else:
                        edge_sentiment_positive+=1
                    bnss_score_across_time_with_interestingMarked[bnssIdFromEdge][time_key] = score,(edge_sentiment_negative,edge_sentiment_positive),isInteresting
                else:
                    bnssIdFromEdge = siaObject2.getId()
                    if bnssIdFromEdge in bnss_score_across_time_with_interestingMarked:
                        score,(edge_sentiment_negative,edge_sentiment_positive),isInteresting = bnss_score_across_time_with_interestingMarked[bnssIdFromEdge][time_key]
                        if dummy_lbp.getEdgeDataForNodes(*fakeReviewEdge).getReviewSentiment() == SIAUtil.REVIEW_TYPE_NEGATIVE:
                            edge_sentiment_negative+=1
                        else:
                            edge_sentiment_positive+=1
                        bnss_score_across_time_with_interestingMarked[bnssIdFromEdge][time_key] = score,(edge_sentiment_negative,edge_sentiment_positive),isInteresting
    print bnss_score_across_time_with_interestingMarked
コード例 #2
0
    def runLBP(self):
        threadedLBP = LBP(self.graph)
        threadedLBP.doBeliefPropagationIterative(self.limit)

        (fakeUsers, honestUsers, unclassifiedUsers, badProducts,
         goodProducts, unclassifiedProducts, fakeReviewEdges,
         realReviewEdges, unclassifiedReviewEdges) = threadedLBP.calculateBeliefVals()

        self.to_be_removed_usr_bnss_edges = set([(threadedLBP.getEdgeDataForNodes(*edge).getUserId(),\
                                                       threadedLBP.getEdgeDataForNodes(*edge).getBusinessID())\
                             for edge in fakeReviewEdges])
        print len(self.to_be_removed_usr_bnss_edges)
コード例 #3
0
def runParentLBPAndCompareStatistics(certifiedFakesFromTemporalAlgo,certifiedRealFromTemporalAlgo, parent_graph):
    print "------------------------------------Running Parent LBP along with all Time Edges--------------------------------------"
    # run LBP on a non temporal full graph for comparison
    parent_lbp = LBP(parent_graph)
    parent_lbp.doBeliefPropagationIterative(50)
    (parent_lbp_fakeUsers, parent_lbp_honestUsers,parent_lbp_unclassifiedUsers,\
          parent_lbp_badProducts, parent_lbp_goodProducts, parent_lbp_unclassifiedProducts,\
          parent_lbp_fakeReviewEdges, parent_lbp_realReviewEdges, parent_lbp_unclassifiedReviewEdges) = parent_lbp.calculateBeliefVals()

    print "-----------------------------------------------Statistics------------------------------------------------------------------"
    fakeReviewsInParentLBP = set([parent_lbp.getEdgeDataForNodes(*edge).getId() for edge in parent_lbp_fakeReviewEdges])
    realReviewsInParentLBP = set([parent_lbp.getEdgeDataForNodes(*edge).getId() for edge in parent_lbp_realReviewEdges])

    fakeReviewsFromYelp   = set([parent_lbp.getEdgeDataForNodes(*edge).getId() for edge in parent_graph.edges()\
                                  if not parent_lbp.getEdgeDataForNodes(*edge).isRecommended()] )
    realReviewsFromYelp = set([parent_lbp.getEdgeDataForNodes(*edge).getId() for edge in parent_graph.edges()\
                                  if  parent_lbp.getEdgeDataForNodes(*edge).isRecommended()] )
    fakeReviewsFromTemporalAlgo = set([parent_lbp.getReviewIdsForUsrBnssId(usrId, bnssId) for (usrId,bnssId) in certifiedFakesFromTemporalAlgo])
    realReviewsFromTemporalAlgo = set([parent_lbp.getReviewIdsForUsrBnssId(usrId, bnssId) for (usrId,bnssId) in realFromTemporalAlgo])

    totalReviews = len([egde for egde in parent_graph.edges()])

    #Accuracy
    print 'Fake Reviews Temporal Algo',len(fakeReviewsFromTemporalAlgo)
    print 'Fake Reviews LBP',len(fakeReviewsInParentLBP)
    print 'Fake Reviews Yelp', len(fakeReviewsFromYelp)

    print 'Real Reviews Temporal Algo',len(realReviewsFromTemporalAlgo)
    print 'Real Reviews LBP',len(realReviewsInParentLBP)
    print 'Real Reviews Yelp', len(realReviewsFromYelp)

    print 'Intersection of FakeReviews between Yelp with TemporalLBP:', len(fakeReviewsFromYelp&fakeReviewsFromTemporalAlgo)
    print 'Intersection of FakeReviews between Yelp with LBP:', len(fakeReviewsFromYelp&fakeReviewsInParentLBP)
    print 'Intersection of FakeReviews between Temporal LBP with LBP:', len(fakeReviewsFromTemporalAlgo&fakeReviewsInParentLBP)
    print 'Intersection FakeReviews Across Yelp,Temporal and LBP',len(fakeReviewsFromTemporalAlgo&fakeReviewsInParentLBP&fakeReviewsFromYelp)

    print 'Intersection of RealReviews between Yelp with TemporalLBP:', len(realReviewsFromYelp&realReviewsFromTemporalAlgo)
    print 'Intersection of RealReviews between Yelp with LBP:', len(realReviewsFromYelp&realReviewsInParentLBP)
    print 'Intersection of RealReviews between Temporal LBP with LBP:', len(realReviewsFromTemporalAlgo&realReviewsInParentLBP)
    print 'Intersection RealReviews Across Yelp,Temporal and LBP',len(realReviewsFromTemporalAlgo&realReviewsInParentLBP&fakeReviewsFromYelp)


    print 'Fake Review - Yelp-TemporalLBP:',len(fakeReviewsFromYelp-fakeReviewsFromTemporalAlgo)
    print 'Fake Reviews - TemporalLBP-Yelp:',len(fakeReviewsFromTemporalAlgo-fakeReviewsFromYelp)

    print 'Fake Reviews Yelp-LBP:', len(fakeReviewsFromYelp-fakeReviewsInParentLBP)
    print 'Fake Reviews LBP-Yelp:', len(fakeReviewsInParentLBP-fakeReviewsFromYelp)

    print 'Fake Reviews Temporal LBP-LBP:', len(fakeReviewsFromTemporalAlgo-fakeReviewsInParentLBP)
    print 'Fake Reviews LBP-TemporalLBP:', len(fakeReviewsInParentLBP-fakeReviewsFromTemporalAlgo)

    trueNegativesTemporalAlgo = len(realReviewsFromYelp&realReviewsFromTemporalAlgo)
    truePositivesTemporalAlgo = len(fakeReviewsFromYelp&fakeReviewsFromTemporalAlgo)

    trueNegativesLBP = len(realReviewsFromYelp&realReviewsInParentLBP)
    truePositivesLBP = len(fakeReviewsFromYelp&fakeReviewsInParentLBP)

    accuracyOfTemporalAlgo = (truePositivesTemporalAlgo + trueNegativesTemporalAlgo)/totalReviews
    accuracyOfLBP = (truePositivesLBP + trueNegativesLBP)/totalReviews

    precisionOfTemporalAlgo = len(fakeReviewsFromYelp&fakeReviewsFromTemporalAlgo)/len(fakeReviewsFromTemporalAlgo)
    precisionOfLBP = len(fakeReviewsFromYelp&fakeReviewsInParentLBP)/len(fakeReviewsInParentLBP)

    recallOfTemporalAlgo = len(fakeReviewsFromYelp&fakeReviewsFromTemporalAlgo)/len(fakeReviewsFromYelp)
    recallOfLBP = len(fakeReviewsFromYelp&fakeReviewsInParentLBP)/len(fakeReviewsFromYelp)

    F1ScoreOfTemporalAlgo = (2*precisionOfTemporalAlgo*recallOfTemporalAlgo)/(precisionOfTemporalAlgo+recallOfTemporalAlgo)
    F1ScoreOfLBP = (2*precisionOfLBP*recallOfLBP)/(precisionOfLBP+recallOfLBP)

    print 'Accuracy of Temporal LBP',accuracyOfTemporalAlgo
    print 'Accuracy of LBP', accuracyOfLBP

    print 'Precision of Temporal LBP',precisionOfTemporalAlgo
    print 'Precision of LBP', precisionOfLBP

    print 'Recall of Temporal LBP', recallOfTemporalAlgo
    print 'Recall of LBP', recallOfLBP

    print 'F1Score of Temporal LBP',F1ScoreOfTemporalAlgo
    print 'F1Score of LBP',F1ScoreOfLBP
コード例 #4
0
def mergeTimeBasedGraphsWithNotMergeableIds(alltimeD_access_merge_graph,not_mergeable_businessids, cross_time_graphs):
    # whatever businesses did drastically change,
    # we will copy the super graph and try adding these edges to the copied
    # graph and run LBP
    to_be_removed_edge_between_user_bnss = set()

    #copy_merge_lbp_runner_threads = []
    beforeThreadTime = datetime.now()
    for time_key in not_mergeable_businessids:
        copied_all_timeD_access_merge_graph =  deepcopy(alltimeD_access_merge_graph)
        graph = cross_time_graphs[time_key]
        for bnssid in not_mergeable_businessids[time_key]:
            bnss = graph.getBusiness(bnssid)
            usrs = graph.neighbors(bnss)
            for usr in usrs:
                review = deepcopy(graph.get_edge_data(usr,bnss)[SIAUtil.REVIEW_EDGE_DICT_CONST])
                copied_all_timeD_access_merge_graph.add_edge(copied_all_timeD_access_merge_graph.getBusiness(bnss.getId()),\
                                                             copied_all_timeD_access_merge_graph.getUser(usr.getId()),
                                                             {SIAUtil.REVIEW_EDGE_DICT_CONST:review})
        copy_merge_lbp = LBP(copied_all_timeD_access_merge_graph)
        copy_merge_lbp.doBeliefPropagationIterative(50)
        (fakeUsers, honestUsers,unclassifiedUsers,\
         badProducts,goodProducts, unclassifiedProducts,\
         fakeReviewEdges, realReviewEdges,unclassifiedReviewEdges) = copy_merge_lbp.calculateBeliefVals()
        for edge in fakeReviewEdges:
            (s1,s2) = edge
            if s1.getNodeType() == SIAUtil.USER:
                to_be_removed_edge_between_user_bnss.add((s1.getId(),s2.getId()))
            else:
                to_be_removed_edge_between_user_bnss.add((s2.getId(),s1.getId()))
#         copy_merge_lbp_runner = LBPRunnerThread(copied_all_timeD_access_merge_graph, 25, 'LBP Runner For Not mergeableIds'+str(time_key))
#         copy_merge_lbp_runner_threads.append(copy_merge_lbp_runner)
#         copy_merge_lbp_runner.start()


#     for copy_merge_lbp_runner in copy_merge_lbp_runner_threads:
#         copy_merge_lbp_runner.join()

    afterThreadTime = datetime.now()
    print 'Time to be reduced', afterThreadTime-beforeThreadTime

#     for copy_merge_lbp_runner in copy_merge_lbp_runner_threads:
#         print 'Copy merge runner', len(copy_merge_lbp_runner.getFakeEdgesData())
#         to_be_removed_edge_between_user_bnss = to_be_removed_edge_between_user_bnss.union(copy_merge_lbp_runner.getFakeEdgesData())

    #from the drastically change businesses we have find out all fake edges in the above step
    # without them add rest of the edges to the super graph and run LBP on it
    for time_key in not_mergeable_businessids:
        graph = cross_time_graphs[time_key]
        for bnssid in not_mergeable_businessids[time_key]:
            bnss = graph.getBusiness(bnssid)
            usrs = graph.neighbors(bnss)
            for usr in usrs:
                if (usr.getId(),bnss.getId()) not in to_be_removed_edge_between_user_bnss:
                    review = deepcopy(graph.get_edge_data(usr,bnss)[SIAUtil.REVIEW_EDGE_DICT_CONST])
                    alltimeD_access_merge_graph.add_edge(alltimeD_access_merge_graph.getBusiness(bnss.getId()),\
                                                         alltimeD_access_merge_graph.getUser(usr.getId()),\
                                                          {SIAUtil.REVIEW_EDGE_DICT_CONST:review})

    print "------------------------------------Running Final Merge LBP--------------------------------------"
    merge_lbp = LBP(alltimeD_access_merge_graph)
    merge_lbp.doBeliefPropagationIterative(50)
    (fakeUsers, honestUsers,unclassifiedUsers,\
     badProducts,goodProducts, unclassifiedProducts,\
     fakeReviewEdges, realReviewEdges,unclassifiedReviewEdges) = merge_lbp.calculateBeliefVals()
    for edge in fakeReviewEdges:
        to_be_removed_edge_between_user_bnss.add((merge_lbp.getEdgeDataForNodes(*edge).getUserId(),\
                                                    merge_lbp.getEdgeDataForNodes(*edge).getBusinessID()))
    certifiedRealFromTemporalAlgo = set()
    for edge in realReviewEdges:
        certifiedRealFromTemporalAlgo.add((merge_lbp.getEdgeDataForNodes(*edge).getUserId(),\
                                                    merge_lbp.getEdgeDataForNodes(*edge).getBusinessID()))
    return (to_be_removed_edge_between_user_bnss,certifiedRealFromTemporalAlgo)