Ejemplo n.º 1
0
def singleuser(username):
    [tweet_textList,tweet_timeList] = get_tweets(username)
    if(tweet_timeList):
        if(tweet_timeList[0] == -1):
            return [0,0,0,0,0,0,-1]
    if(len(tweet_textList)!=0 and len(tweet_timeList)!=0):
        a=0
        b=0
        c=0
        d=0
        e=0

        a=rank_time(tweet_timeList)
        b=rank_similarity(tweet_textList)
        # c=rank_url(tweet_textList)
        # d=rank_wot(tweet_textList)
        # e=checkAdultContent(tweet_textList)
        [c,e,d]=rank_url_adult_wot(tweet_textList)
        a=truncate(a,2)
        b=truncate(b,2)
        c=truncate(c,2)
        d=truncate(d,2)
        e=truncate(e,2)
        print("Output")
        print("------")
        print("URL RANKING : ",c)
        print("SIMILARITY RANKING : ",b)
        print("WOT RANKING : ",d)
        print("ADULT CONTENT : ",e)
        print("TIME RANKING : ",a)

        FAL=0
        if(e!=10):
            FAL=a*0.15+b*0.25+c*0.3+d*0.3
        type=0
        if(FAL>=4 and FAL<=5):
            type=1
        if(FAL>5 and FAL<=10):
            type=2
        FAL=truncate(FAL,2)
        print("FAL : ",FAL)
        if(type==0):
            print("The entered user is Non Anomalous")
        elif(type==1):
            print("The entered user is suspected")
        else:
            print("The entered user is Anomalous")
        return [a,b,c,d,e,FAL,type]
    else:
        return ["empty"]
Ejemplo n.º 2
0
def analyser(need_fetch,size=0):
    start=datetime.datetime.now()
    dataset = pd.read_csv('Followers.csv')
    usernames = dataset.iloc[1:, [0]].values
    lines=[]
    if(need_fetch):
        for l in range(0,size):
            username=usernames[l]
        # for username in usernames:
            print(username[0])
            [tweet_textList,tweet_timeList] = get_tweets(username[0])
            if(len(tweet_textList)!=0 and len(tweet_timeList)!=0):
                a=rank_time(tweet_timeList)
                b=rank_similarity(tweet_textList)
                # c=rank_url(tweet_textList)
                # d=rank_wot(tweet_textList)
                # e=checkAdultContent(tweet_textList)
                [c,e,d]=rank_url_adult_wot(tweet_textList)

                print("URL RANKING : ",c)
                print("SIMILARITY RANKING : ",b)
                print("WOT RANKING : ",d)
                print("ADULT CONTENT : ",e)
                print("TIME RANKING : ",a)

                FAL=0
                if(e!=10):
                    FAL=a*0.15+b*0.25+c*0.3+d*0.3
                type=0
                if(FAL>=4 and FAL<=5):
                    type=1
                if(FAL>5 and FAL<=10):
                    type=2

                lines.append([a,b,c,d,e,FAL,type])
            else:
                print("Empty")

        with open('dataset_gen.csv', 'w') as writeFile:
            writer = csv.writer(writeFile)
            writer.writerows(lines)
        writeFile.close()
    dataset = pd.read_csv('dataset_gen.csv')
    cm_knn=KNN(dataset)
    cm_nb=NaiveBayesClassifier(dataset)
    cm_dt=DecisionTree(dataset)
    cm_rf=RandomForest(dataset)
    cm_svm=SVM(dataset)
    print("KNN Classification")
    print("==================")
    print(cm_knn)
    findAccuracy(cm_knn,cm_knn.shape)
    print()
    print("Naive Bayes Classification")
    print("==========================")
    print(cm_nb)
    findAccuracy(cm_nb,cm_nb.shape)
    print()
    print("Decistion Tree Classification")
    print("=============================")
    print(cm_dt)
    findAccuracy(cm_dt,cm_dt.shape)
    print()
    print("Random Forest Classification")
    print("============================")
    print(cm_rf)
    findAccuracy(cm_knn,cm_knn.shape)
    print()
    print("SVM Classification")
    print("==================")
    print(cm_svm)
    findAccuracy(cm_svm,cm_svm.shape)
    print()
    print("Completed")
    end=datetime.datetime.now()
    print("end time ",end)
    print(end-start)
    return [cm_knn,cm_nb,cm_dt,cm_rf,cm_svm]