Esempio n. 1
0
    parser.add_argument('-dim', help='number of hidden units (default = 100)', default=100, type=int)
    parser.add_argument('-min', help='minimum frequency for pivots (default = 10)', default=10, type=int)
    parser.add_argument('-piv', help='number of pivots (default = 500)', default=500, type=int)
    parser.add_argument('-c', help='C parameter for svm (default = 0.1)', default=0.1, type=float)
    args = parser.parse_args()



    print('Domain adaptation from {0} to {1}'.format(args.tr, args.te))

    # making a shared representation for both source domain and target domain
    # first param: the source domain
    # second param: the target domain
    # third param: number of pivots
    # fourth param: appearance threshold for pivots in source and target domain
    # fifth parameter: the embedding dimension, identical to the hidden layer dimension

    tr.train(args.tr, args.te, args.dim, args.min, args.piv)

    # learning the classifier in the source domain and testing in the target domain
    # the results, weights and all the meta-data will appear in source-target directory
    # first param: the source domain
    # second param: the target domain
    # third param: number of pivots
    # fourth param: appearance threshold for pivots in source and target domain
    # fifth param: the embedding dimension identical to the hidden layer dimension
    # sixth param: we use logistic regression as our classifier, it takes the const C for its learning

    sentiment.sent(args.tr, args.te, args.dim, args.min, args.piv, args.c)

Esempio n. 2
0
import sentiment
if __name__ == '__main__':
    domain = []
    domain.append("books")
    domain.append("dvd")
    domain.append("electronics")
    domain.append("kitchen")

    source_ind = 2
    target_ind = 3
    # making a shared representation for both source domain and target domain
    # first param: the source domain
    # second param: the target domain
    # third param: number of pivots
    # fourth param: appearance threshold for pivots in source and target domain
    # fifth parameter: the embedding dimension, identical to the hidden layer dimension

    tr.train(domain[source_ind], domain[target_ind], 100, 10, 500)

    # learning the classifier in the source domain and testing in the target domain
    # the results, weights and all the meta-data will appear in source-target directory
    # first param: the source domain
    # second param: the target domain
    # third param: number of pivots
    # fourth param: appearance threshold for pivots in source and target domain
    # fifth param: the embedding dimension identical to the hidden layer dimension
    # sixth param: we use logistic regression as our classifier, it takes the const C for its learning

    sentiment.sent(domain[source_ind], domain[target_ind], 100, 10, 500, 0.1)

Esempio n. 3
0
    lista = [[1, 0], [0, 1]]

    rodadas = len(k) * len(dim) * len(lista)
    conta = 0
    for i in lista:
        for kzinho in k:
            print("faltam " + str(rodadas) + " rodadas")
            rodadas = rodadas - 1

            print("pivots =" + str(kzinho))
            src = i[0]
            dst = i[1]
            time = datetime.datetime.now()
            print("loading....")
            tr.train(domain[src], domain[dst], kzinho, 10)
            print("Sent....")
            for d in dim:
                print(d)
                sentiment.sent(domain[src], domain[dst], kzinho, 10, d, 0.1,
                               "logistic", "binario", time)
            print(datetime.datetime.now())

    #[(a, b) for a in  for b in lista2]
    """for j in algorithms:
        for n in extraction:
            for i in lista:
                src = i[0]
                dst = i[1]
                print(datetime.datetime.now())
                print("loading....")
import tr
import sentiment
if __name__ == '__main__':
    domain = []
    domain.append("books")
    domain.append("kitchen")
    domain.append("dvd")
    domain.append("electronics")

    # making a shared representation for both source domain and target domain
    # first param: the source domain
    # second param: the target domain
    # third param: number of pivots
    # fourth param: appearance threshold for pivots in source and target domain
    # fifth parameter: the embedding dimension, identical to the hidden layer dimension

    tr.train(domain[0], domain[1], 100, 10, 500)

    # learning the classifier in the source domain and testing in the target domain
    # the results, weights and all the meta-data will appear in source-target directory
    # first param: the source domain
    # second param: the target domain
    # third param: number of pivots
    # fourth param: appearance threshold for pivots in source and target domain
    # fifth param: the embedding dimension identical to the hidden layer dimension
    # sixth param: we use logistic regression as our classifier, it takes the const C for its learning

    sentiment.sent(domain[0], domain[1], 100, 10, 500, 0.1)

import tr
import sentiment

if __name__ == '__main__':
    domain = ["books", "kitchen", "dvd", "electronics"]

    ## Making a shared representation for both source domain and target domain.
    # first param: the source domain
    # second param: the target domain
    # third param: number of pivots
    # fourth param: appearance threshold for pivots in source and target domain
    tr.train(domain[0], domain[1], 10, 10)

    ## Learning the classifier in the source domain and testing in the target domain
    ## the results, weights and all the meta-data will appear in source-target directory.
    # first param: the source domain
    # second param: the target domain
    # third param: number of pivots
    # fourth param: appearance threshold for pivots in source and target domain
    # fifth param: The SVD dimension
    # sixth param: we use logistic regression as our classifier, it takes the const C for its learning
    sentiment.sent(domain[0], domain[1], 500, 10, 50, 0.1)
Esempio n. 6
0
    sorting_criteria.append("RMI")

    # training the PBLM model in order to create structure aware
    #input:
    # shared representation for both source domain and target domain
    # first param: the source domain
    # second param: the target domain
    # third param: number of pivots
    # fourth param: appearance threshold for pivots in source and target domain
    # fifth param: the embedding dimension
    # sixth param: maximum number of words to work with
    # seventh param: maximum review length to work with
    # eighth param: hidden units number for the PBLM model
    #output: the software will create corresponding directory with the model

    tr.train(domain[0], domain[1], 100, 10, 128, 10000, 500, 256, 2,
             sorting_criteria[0])

    # training the sentiment cnn using PBLM's representation
    # shared representation for both source domain and target domain
    # this phase needs a corresponding trained PBLM model in order to work
    # first param: the source domain
    # second param: the target domain
    # third param: number of pivots
    # fourth param: maximum review length to work with
    # fifth param: the embedding dimension
    # sixth param: maximum number of words to work with
    # seventh param: hidden units number for the PBLM model
    # eighth param: hidden units number for the lstm model
    # output: the results file will be created in the same directory
    # of the model under the results directory in the "lstm" dir
    #sentiment.PBLM_LSTM(domain[0], domain[1], 500, 500, 256, 10000, 256, 256)
        print(x)
        if x == '1':
            algorithms = ['logistic']
        elif x == '2':
            algorithms = ['random']
        elif x == '3':
            algorithms = ['tree']
        elif x == '4':
            algorithms = ['svm']
        else:
            print("\n" * 130)
            print(x + " IS AN INVALID INPUT, TRY AGAIN!")

    extraction = ['tfidf', 'idf', 'counter', 'binario']
    k = 500

    lista = [[0, 1], [0, 2], [0, 3], [1, 0], [1, 2], [1, 3], [2, 0], [2, 1],
             [2, 3], [3, 0], [3, 1], [3, 2]]

    for j in algorithms:
        for n in extraction:
            for i in lista:
                src = i[0]
                dst = i[1]
                time = datetime.datetime.now()
                print("loading....")
                tr.train(domain[src], domain[dst], 500, 10)
                print("Sent....")
                sentiment.sent(domain[src], domain[dst], 500, 10, 50, 0.1, j,
                               n, time)
Esempio n. 8
0
        # making a shared representation for both source domain and target domain
        # first param: the source domain
        # second param: the target domain
        # third param: number of pivots
        # fourth param: appearance threshold for pivots in source and target domain
        # fifth parameter: the embedding dimension, identical to the hidden layer dimension

        pivots = []
        if args.method == 'list':
            pivots = read_pivot_file(args.pivot_file)

        tr.train(domain[source_ind],
                 domain[target_ind],
                 args.pivots,
                 args.doc_freq,
                 args.nhid,
                 pivot_method=args.method,
                 pivots=pivots)

        # learning the classifier in the source domain and testing in the target domain
        # the results, weights and all the meta-data will appear in source-target directory
        # first param: the source domain
        # second param: the target domain
        # third param: number of pivots
        # fourth param: appearance threshold for pivots in source and target domain
        # fifth param: the embedding dimension identical to the hidden layer dimension
        # sixth param: we use logistic regression as our classifier, it takes the const C for its learning

        sentiment.sent(domain[source_ind], domain[target_ind], 100, 10, 500,
                       0.1)
Esempio n. 9
0
import sentiment
import ensemble
if __name__ == '__main__':
    domain = []
    domain.append("books")  #0
    domain.append("kitchen")  #1
    domain.append("dvd")  #2
    domain.append("electronics")  #3

    # making a shared representation for both source domain and target domain
    # first param: the source domain
    # second param: the target domain
    # third param: number of pivots
    # fourth param: appearance threshold for pivots in source and target domain
    # fifth parameter: the embedding dimension, identical to the hidden layer dimension

    tr.train(domain[2], domain[3], 200, 10, 500)

    # learning the classifier in the source domain and testing in the target domain
    # the results, weights and all the meta-data will appear in source-target directory
    # first param: the source1 domain
    # second param: the source2 domain
    # third param: the source3 domain
    # fourth param: the target domain
    # fifth param: number of pivots
    # sixth param: appearance threshold for pivots in source and target domain
    # seveth param: the embedding dimension identical to the hidden layer dimension
    # eighth param: we use logistic regression as our classifier, it takes the const C for its learning
    ensemble.sent(domain[0], domain[1], domain[3], domain[2], 100, 10, 500,
                  0.1)