Beispiel #1
0
import sentiment
if __name__ == '__main__':
    domain = []
    domain.append("books")
    domain.append("dvd")
    domain.append("electronics")
    domain.append("kitchen")

    source_ind = 2
    target_ind = 3
    # making a shared representation for both source domain and target domain
    # first param: the source domain
    # second param: the target domain
    # third param: number of pivots
    # fourth param: appearance threshold for pivots in source and target domain
    # fifth parameter: the embedding dimension, identical to the hidden layer dimension

    tr.train(domain[source_ind], domain[target_ind], 100, 10, 500)

    # learning the classifier in the source domain and testing in the target domain
    # the results, weights and all the meta-data will appear in source-target directory
    # first param: the source domain
    # second param: the target domain
    # third param: number of pivots
    # fourth param: appearance threshold for pivots in source and target domain
    # fifth param: the embedding dimension identical to the hidden layer dimension
    # sixth param: we use logistic regression as our classifier, it takes the const C for its learning

    sentiment.sent(domain[source_ind], domain[target_ind], 100, 10, 500, 0.1)

Beispiel #2
0
    parser.add_argument('-dim', help='number of hidden units (default = 100)', default=100, type=int)
    parser.add_argument('-min', help='minimum frequency for pivots (default = 10)', default=10, type=int)
    parser.add_argument('-piv', help='number of pivots (default = 500)', default=500, type=int)
    parser.add_argument('-c', help='C parameter for svm (default = 0.1)', default=0.1, type=float)
    args = parser.parse_args()



    print('Domain adaptation from {0} to {1}'.format(args.tr, args.te))

    # making a shared representation for both source domain and target domain
    # first param: the source domain
    # second param: the target domain
    # third param: number of pivots
    # fourth param: appearance threshold for pivots in source and target domain
    # fifth parameter: the embedding dimension, identical to the hidden layer dimension

    tr.train(args.tr, args.te, args.dim, args.min, args.piv)

    # learning the classifier in the source domain and testing in the target domain
    # the results, weights and all the meta-data will appear in source-target directory
    # first param: the source domain
    # second param: the target domain
    # third param: number of pivots
    # fourth param: appearance threshold for pivots in source and target domain
    # fifth param: the embedding dimension identical to the hidden layer dimension
    # sixth param: we use logistic regression as our classifier, it takes the const C for its learning

    sentiment.sent(args.tr, args.te, args.dim, args.min, args.piv, args.c)

Beispiel #3
0
import time
from sentiment import sent


def extract(file):
    f = open(file, 'r')
    tupleArray = []
    for line in f:
        zero = line[20:38]
        min = line.find('>')
        max = len(line)
        one = line[min + 1:max].strip()
        tuple = [zero, one]
        tupleArray.append(tuple)
    return tupleArray


d = extract('twitterData.txt')
f = open("processedTweets", 'a')
for j in range(len(d)):
    d[j][0] = d[j][0].split(" ")
    time.sleep(0.25)  #We are limited in our api calls per seconds
    f.write(
        str(d[j][0][0]) + "," + str(d[j][0][1]) + "," + str(sent(d[j][1])) +
        "\n")
import tr
import sentiment
if __name__ == '__main__':
    domain = []
    domain.append("books")
    domain.append("kitchen")
    domain.append("dvd")
    domain.append("electronics")

    # making a shared representation for both source domain and target domain
    # first param: the source domain
    # second param: the target domain
    # third param: number of pivots
    # fourth param: appearance threshold for pivots in source and target domain
    # fifth parameter: the embedding dimension, identical to the hidden layer dimension

    tr.train(domain[0], domain[1], 100, 10, 500)

    # learning the classifier in the source domain and testing in the target domain
    # the results, weights and all the meta-data will appear in source-target directory
    # first param: the source domain
    # second param: the target domain
    # third param: number of pivots
    # fourth param: appearance threshold for pivots in source and target domain
    # fifth param: the embedding dimension identical to the hidden layer dimension
    # sixth param: we use logistic regression as our classifier, it takes the const C for its learning

    sentiment.sent(domain[0], domain[1], 100, 10, 500, 0.1)

Beispiel #5
0
    conta = 0
    for i in lista:
        for kzinho in k:
            print("faltam " + str(rodadas) + " rodadas")
            rodadas = rodadas - 1

            print("pivots =" + str(kzinho))
            src = i[0]
            dst = i[1]
            time = datetime.datetime.now()
            print("loading....")
            tr.train(domain[src], domain[dst], kzinho, 10)
            print("Sent....")
            for d in dim:
                print(d)
                sentiment.sent(domain[src], domain[dst], kzinho, 10, d, 0.1,
                               "logistic", "binario", time)
            print(datetime.datetime.now())

    #[(a, b) for a in  for b in lista2]
    """for j in algorithms:
        for n in extraction:
            for i in lista:
                src = i[0]
                dst = i[1]
                print(datetime.datetime.now())
                print("loading....")
                tr.train(domain[src],domain[dst],500,10)
                print("Sent....")
                sentiment.sent(domain[src],domain[dst],500,10,50,0.1, "random")"""
        print(x)
        if x == '1':
            algorithms = ['logistic']
        elif x == '2':
            algorithms = ['random']
        elif x == '3':
            algorithms = ['tree']
        elif x == '4':
            algorithms = ['svm']
        else:
            print("\n" * 130)
            print(x + " IS AN INVALID INPUT, TRY AGAIN!")

    extraction = ['tfidf', 'idf', 'counter', 'binario']
    k = 500

    lista = [[0, 1], [0, 2], [0, 3], [1, 0], [1, 2], [1, 3], [2, 0], [2, 1],
             [2, 3], [3, 0], [3, 1], [3, 2]]

    for j in algorithms:
        for n in extraction:
            for i in lista:
                src = i[0]
                dst = i[1]
                time = datetime.datetime.now()
                print("loading....")
                tr.train(domain[src], domain[dst], 500, 10)
                print("Sent....")
                sentiment.sent(domain[src], domain[dst], 500, 10, 50, 0.1, j,
                               n, time)