def main(args):
    global SUPERVISED, NO_SUB_IND, toolbox

    #For elitism, at least the best individual
    #is recorded
    NO_ELI = (int)(POP_SIZE * GP_ELI)
    if NO_ELI < 10:
        NO_ELI = 10

    filename = "iteration"+str(args[0])+".txt"
    file = open(filename,'w+')

    run_index = int(args[0])
    supervised = int(args[1])

    if supervised == 0:
        SUPERVISED = False
    else:
        SUPERVISED = True

    #setWeight()

    NO_SUB_IND = int(args[2])

    toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.sub_individual, n=NO_SUB_IND)
    toolbox.register("population", tools.initRepeat, list, toolbox.individual, n=POP_SIZE)

    random.seed(1617**2*run_index)

    #FitnessFunction.setWeight(src_feature=Core.src_feature, src_label=Core.src_label,
    #                          tarU_feature=Core.tarU_feature, tarU_label=Core.tarU_soft_label)
    time_start = time.clock()
    pop = toolbox.population()
    hof = tools.HallOfFame(NO_ELI)

    #evaluate the population
    fitness = toolbox.map(toolbox.evaluate, pop)
    for ind, fit in zip(pop, fitness):
        ind.fitness.values = fit

    #Update the HoF
    hof.update(pop)

    towrite = "Supervised: %r \n" \
              "Number of sub tree: %d\n" \
              "Source weight: %f\n" \
              "Diff source and target weight: %f\n" \
              "Target weight: %g \n" % (SUPERVISED, NO_SUB_IND,
                                        FitnessFunction.srcWeight,
                                        FitnessFunction.margWeight,
                                        FitnessFunction.tarWeight)

    for gen in range(NGEN):
        print(gen)

        towrite = towrite + ("----Generation %i -----\n" %gen)

        #Select the next generation individuals
        #Leave space for elitism
        offspringS = toolbox.select(pop, len(pop)-NO_ELI)
        # Clone the selected individuals
        offspring = [toolbox.clone(ind) for ind in offspringS]

        #go through each individual
        for i in range(1, len(offspring), 2):
            if random.random() < GP_CXPB:
                #perform crossover for all the features
                first = offspring[i-1]
                second = offspring[i]
                first, second = crossoverEach(first, second)
                del first.fitness.values
                del second.fitness.values

        for i in range(len(offspring)):
            if random.random() < GP_MUTBP:
                parent = pop[i]
                for j in range(1, len(parent)):
                    if random.random() < GP_MUTSUB:
                        parent[j] = toolbox.mutate(parent[j])
                del parent.fitness.values

        #Now put HOF back to offspring
        for ind in hof:
            offspring.append(toolbox.clone(ind))

        # Evaluate the individuals with an invalid fitness
        invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
        fitnesses = toolbox.map(toolbox.evaluate, invalid_ind)
        for ind, fit in zip(invalid_ind, fitnesses):
            ind.fitness.values = fit

        #Now update the hof for the next iteration
        hof.update(offspring)

        pop[:] = offspring

        # Gather all the fitnesses in one list and print the stats
        fits = [ind.fitness.values[0] for ind in pop]

        length = len(pop)
        mean = sum(fits) / length
        sum2 = sum(x*x for x in fits)
        std = abs(sum2 / length - mean**2)**0.5

        towrite = towrite + ("  Min %s\n" % min(fits))
        towrite = towrite + ("  Max %s\n" % max(fits))
        towrite = towrite + ("  Avg %s\n" % mean)
        towrite = towrite + ("  Std %s\n" % std)

        bestInd = hof[0]

        funcs = [toolbox.compile(expr=tree) for tree in bestInd]
        src_feature = GPUtility.buildNewFeatures(Core.src_feature, funcs)
        tarU_feature = GPUtility.buildNewFeatures(Core.tarU_feature, funcs)
        tarL_feature = GPUtility.buildNewFeatures(Core.tarL_feature, funcs)

        if SUPERVISED:
            src_err, diff_marg, tar_err = FitnessFunction.domain_differece(src_feature=src_feature, src_label=Core.src_label,
                                                                           classifier=Core.classifier,
                                                                           tarU_feature=tarU_feature, tarU_soft_label=Core.tarU_soft_label,
                                                                           tarL_feature=tarL_feature, tarL_label=Core.tarL_label)
        else:
            src_err, diff_marg, tar_err = FitnessFunction.domain_differece(src_feature=src_feature, src_label=Core.src_label,
                                                                           classifier=Core.classifier,
                                                                           tarU_feature=tarU_feature, tarU_soft_label=Core.tarU_soft_label)

        towrite = towrite + ("  Source Error: %f \n  Diff Marg: %f \n  Target Error: %f \n" %(src_err, diff_marg, tar_err))

        acc = 1.0 - FitnessFunction.classification_error(training_feature=src_feature, training_label=Core.src_label,
                                                         classifier=Core.classifier,
                                                         testing_feature=tarU_feature, testing_label=Core.tarU_label)
        towrite = towrite + ("  Accuracy on unlabel target: "+str(acc) + "\n")

        # Update the pseudo label and weight
        Core.classifier.fit(src_feature, Core.src_label)
        Core.tarU_soft_label = Core.classifier.predict(tarU_feature)
        #FitnessFunction.setWeight(Core.src_feature, Core.src_label, Core.tarU_feature, Core.tarU_SoftLabel)

    time_elapsed = (time.clock() - time_start)

    #process the result
    bestInd = hof[0]
    towrite = towrite + "----Final -----\n"

    funcs = [toolbox.compile(expr=tree) for tree in bestInd]
    src_feature = GPUtility.buildNewFeatures(Core.src_feature, funcs)
    tarU_feature = GPUtility.buildNewFeatures(Core.tarU_feature, funcs)
    acc = 1.0 - FitnessFunction.classification_error(training_feature=src_feature, training_label=Core.src_label,
                                                     classifier=Core.classifier,
                                                     testing_feature=tarU_feature, testing_label=Core.tarU_label)
    towrite = towrite + ("Accuracy on the target (TL): %f\n" % acc)
    towrite = towrite + "Accuracy on the target (No TL): %f\n" % (
                    1.0 - FitnessFunction.classification_error(training_feature=Core.src_feature, training_label=Core.src_label,
                                                               classifier=Core.classifier,
                                                               testing_feature=Core.tarU_feature, testing_label=Core.tarU_label))

    towrite = towrite + ("Computation time: %f\n" % time_elapsed)
    towrite = towrite + ("Number of features: %d\n" % len(bestInd))

    file.write(towrite)
    file.close()
Beispiel #2
0
def main(args):
    global i_stick, i_pbest, i_gbest, ustks

    run_index = int(args[0])
    random.seed(1617**2 * run_index)

    marg_index = int(args[1])
    tar_index = int(args[2])
    FitnessFunction.margVersion = marg_index
    FitnessFunction.tarVersion = tar_index
    FitnessFunction.srcVersion = 1

    filename = "iteration" + str(args[0]) + ".txt"
    output_file = open(filename, 'w+')

    time_start = time.clock()

    # Set the weight for each components in the fitness function
    # normalize_weight()
    FitnessFunction.srcWeight = 1.0
    FitnessFunction.margWeight = 0.0
    FitnessFunction.tarWeight = 1.0
    # print("Start setting weight")
    # FitnessFunction.set_weight(Core.src_feature, Core.src_label, Core.tar_feature)
    # print(FitnessFunction.srcWeight, FitnessFunction.margWeight, FitnessFunction.tarWeight)
    # print("End setting")
    # opposite_weight()

    # Initialize population and the gbest
    pop = toolbox.population(n=NPART)
    best = None

    to_write = (
        "Core classifier: %s\nSource weight: %f\nDiff source and target weight: %f\n"
        "Target weight: %g\nMarginal version: %d\nTarget version: %d\n" %
        (str(Core.classifier), FitnessFunction.srcWeight,
         FitnessFunction.margWeight, FitnessFunction.tarWeight,
         FitnessFunction.margVersion, FitnessFunction.tarVersion))

    archive = []

    for g in range(NGEN):
        print(g)
        to_write += ("=====Gen %d=====\n" % g)

        for part in pop:
            # Evaluate all particles
            part.fitness.values = toolbox.evaluate(part)

            if part.best is None or part.best.fitness < part.fitness:
                part.best = creator.Particle(part)
                part.best.fitness.values = part.fitness.values

            # update gbest
            if best is None or best.fitness < part.fitness:
                best = creator.Particle(part)
                best.fitness.values = part.fitness.values

        if TEST:
            print("is=", i_stick, "ip=", i_pbest, "ig=", i_gbest, "ustks=",
                  ustks)
            print("best=", best)
            print(best.fitness.values)
            print("\n")
            for i, part in enumerate(pop):
                print("Particle %d: " % i)
                print("Particle position:", part)
                print("Particle pbest:", part.best)
                print("Particle stickiness:", part.stk)
                print("\n")
        archive.append(best)

        # now update the position of each particle
        for part in pop:
            toolbox.update(part, best)

        # Gather all the fitness components of the gbest and print the stats
        indices = [index for index, entry in enumerate(best) if entry == 1.0]
        src_feature = Core.Xs[:, indices]
        tar_feature = Core.Xt[:, indices]
        src_err, diff_marg, tar_err = \
            FitnessFunction.domain_differece(src_feature=src_feature, src_label=Core.Ys,
                                             classifier=Core.classifier, tar_feature=tar_feature)

        to_write += (
            "  Source Error: %f \n  Marginal Difference: %f \n  Target Error: %f \n"
            % (src_err, diff_marg, tar_err))
        to_write += ("  Fitness function of real best: %f\n" %
                     best.fitness.values[0])
        acc = 1.0 - FitnessFunction.classification_error(
            training_feature=src_feature,
            training_label=Core.Ys,
            classifier=Core.classifier,
            testing_feature=tar_feature,
            testing_label=Core.Yt)
        to_write += ("  Accuracy on unlabel target: " + str(acc) + "\n")
        to_write += "  Position:" + str(best) + "\n"
        print(src_err, acc, best.fitness.values[0])

        # update the parameters
        i_stick = is_up - (is_up - is_low) * (g + 1) / NGEN
        i_gbest = (1 - i_stick) / (pg_rate + 1)
        i_pbest = pg_rate * i_gbest
        ustks = ustks_low + (ustks_up - ustks_low) * (g + 1) / NGEN

    time_elapsed = (time.clock() - time_start)
    to_write += "----Final -----\n"
    indices = [index for index, entry in enumerate(best) if entry == 1.0]
    src_feature = Core.Xs[:, indices]
    tar_feature = Core.Xt[:, indices]

    if WRITE_OUT:
        src_full = np.concatenate(
            (src_feature, np.reshape(Core.Ys, (Core.Ys.shape[0], 1))), axis=1)
        tar_full = np.concatenate(
            (tar_feature, np.reshape(Core.Yt, (Core.Yt.shape[0], 1))), axis=1)
        np.savetxt("OutGP/Source", src_full, delimiter=",")
        np.savetxt("OutGP/Target", tar_full, delimiter=",")
        nf_file = open("OutGP/noFeatures", "w")
        nf_file.write(str(len(indices)))

    acc = 1.0 - FitnessFunction.classification_error(
        training_feature=src_feature,
        training_label=Core.Ys,
        classifier=Core.classifier,
        testing_feature=tar_feature,
        testing_label=Core.Yt)
    to_write += ("Accuracy of the core classifier: " + str(acc) + "\n")
    to_write += ("Accuracy on the target (No TL) (core classifier): %f\n\n" %
                 (1.0 - FitnessFunction.classification_error(
                     training_feature=Core.ori_src_feature,
                     training_label=Core.Ys,
                     classifier=Core.classifier,
                     testing_feature=Core.ori_tar_feature,
                     testing_label=Core.Yt)))
    new_classifier = LinearSVC(random_state=1617)
    acc = 1.0 - FitnessFunction.classification_error(
        training_feature=src_feature,
        training_label=Core.Ys,
        classifier=new_classifier,
        testing_feature=tar_feature,
        testing_label=Core.Yt)
    to_write += ("Accuracy of the Linear SVM classifier: " + str(acc) + "\n")
    to_write += ("Accuracy on the target (No TL) of Linear SVM: %f\n\n" %
                 (1.0 - FitnessFunction.classification_error(
                     training_feature=Core.ori_src_feature,
                     training_label=Core.Ys,
                     classifier=new_classifier,
                     testing_feature=Core.ori_tar_feature,
                     testing_label=Core.Yt)))

    new_classifier = DecisionTreeClassifier(random_state=1617)
    acc = 1.0 - FitnessFunction.classification_error(
        training_feature=src_feature,
        training_label=Core.Ys,
        classifier=new_classifier,
        testing_feature=tar_feature,
        testing_label=Core.Yt)
    to_write += ("Accuracy of the Linear DT classifier: " + str(acc) + "\n")
    to_write += ("Accuracy on the target (No TL) of DT: %f\n\n" %
                 (1.0 - FitnessFunction.classification_error(
                     training_feature=Core.ori_src_feature,
                     training_label=Core.Ys,
                     classifier=new_classifier,
                     testing_feature=Core.ori_tar_feature,
                     testing_label=Core.Yt)))

    new_classifier = GaussianNB()
    acc = 1.0 - FitnessFunction.classification_error(
        training_feature=src_feature,
        training_label=Core.Ys,
        classifier=new_classifier,
        testing_feature=tar_feature,
        testing_label=Core.Yt)
    to_write += ("Accuracy of the Linear NB classifier: " + str(acc) + "\n")
    to_write += ("Accuracy on the target (No TL) of NB: %f\n\n" %
                 (1.0 - FitnessFunction.classification_error(
                     training_feature=Core.ori_src_feature,
                     training_label=Core.Ys,
                     classifier=new_classifier,
                     testing_feature=Core.ori_tar_feature,
                     testing_label=Core.Yt)))

    to_write += ("Computation time: %f\n" % time_elapsed)
    to_write += ("Number of features: %d\n" % len(indices))
    to_write += str(best)

    output_file.write(to_write)
    output_file.close()
def main(args):
    global i_stick, i_pbest, i_gbest, ustks, SUPERVISED

    run_index = int(args[0])
    random.seed(1617 ** 2 * run_index)
    filename = "iteration"+str(args[0])+".txt"
    file = open(filename, 'w+')

    time_start = time.clock()

    SUPERVISED = False
    #supervised = int(args[1])

    #if supervised == 0:
    #    SUPERVISED = False
    #else:
    #    SUPERVISED = True

    cond_index = int(args[1])
    FitnessFunction.tarVersion = cond_index

    #setWeight()
    #FitnessFunction.setWeight(Core.src_feature, Core.src_label, Core.tarU_feature, Core.tarU_soft_label)

    # Set the weight for each components in the fitness function
    #FitnessFunction.setWeight(src_feature=Core.src_feature, src_label=Core.src_label,
    #                          tarU_feature=Core.tarU_feature, tarU_label=Core.tarU_soft_label)
    FitnessFunction.srcWeight = 0.0
    FitnessFunction.margWeight = 1.0
    FitnessFunction.tarWeight = 0.0

    # Initialize population and the gbest
    pop = toolbox.population(n=NPART)
    best = None

    toWrite = ("Supervised: %r \n" \
              "Source weight: %f\n" \
              "Diff source and target weight: %f\n" \
              "Target weight: %g\n" \
              "Conditional version: %d\n" % (SUPERVISED,
                                        FitnessFunction.srcWeight,
                                        FitnessFunction.margWeight,
                                        FitnessFunction.tarWeight,
                                        FitnessFunction.tarVersion))

    for g in range(NGEN):
        print(g)
        toWrite += ("=====Gen %d=====\n" % g)

        for part in pop:
            # Evaluate all particles
            part.fitness.values = toolbox.evaluate(part)

            if part.best is None or part.best.fitness < part.fitness:
                part.best = creator.Particle(part)
                part.best.fitness.values = part.fitness.values

            # update gbest
            if best is None or best.fitness < part.fitness:
                best = creator.Particle(part)
                best.fitness.values = part.fitness.values

        if TEST:
            print("is=", i_stick, "ip=", i_pbest, "ig=", i_gbest, "ustks=", ustks )
            print("best=", best)
            print(best.fitness.values)
            print("\n")
            for i, part in enumerate(pop):
                print("Particle %d: " % i)
                print("Particle position:",part)
                print("Particle pbest:",part.best)
                print("Particle stickiness:",part.stk)
                print("\n")


        # now update the position of each particle
        for part in pop:
            toolbox.update(part, best)

        # Gather all the fitness components of the gbest and print the stats
        indices = [index for index, entry in enumerate(best) if entry == 1.0]
        src_feature = Core.src_feature[:, indices]
        tarU_feature = Core.tarU_feature[:, indices]
        tarL_feature = Core.tarL_feature[:, indices]
        if SUPERVISED:
            src_err, diff_marg, tar_err = FitnessFunction.domain_differece(src_feature=src_feature, src_label=Core.src_label,
                                                                           classifier=Core.classifier,
                                                                           tarU_feature=tarU_feature,
                                                                           tarL_feature=tarL_feature, tarL_label=Core.tarL_label)
        else:
            src_err, diff_marg, tar_err = FitnessFunction.domain_differece(src_feature=src_feature, src_label=Core.src_label,
                                                                           classifier=Core.classifier,
                                                                           tarU_feature=tarU_feature)

        toWrite += ("  Source Error: %f \n  Diff Marg: %f \n  Target Error: %f \n" %(src_err, diff_marg, tar_err))
        toWrite += ("  Fitness function of real best: %f\n" % best.fitness.values[0])
        acc = 1.0 - FitnessFunction.classification_error(training_feature=src_feature, training_label=Core.src_label,
                                                         classifier=Core.classifier,
                                                         testing_feature=tarU_feature, testing_label=Core.tarU_label)
        toWrite += ("  Accuracy on unlabel target: " + str(acc) + "\n")
        toWrite += "  Position:"+str(best)+"\n"


        # update the parameters
        i_stick = is_up - (is_up - is_low)*(g+1)/NGEN
        i_gbest = (1-i_stick)/(pg_rate+1)
        i_pbest = pg_rate*i_gbest
        ustks   = ustks_low + (ustks_up-ustks_low)*(g+1)/NGEN

        # Update the pseudo label (only when the cond_index is equal to 2)
        if cond_index == 3 & g % 10==0:
            Core.classifier.fit(src_feature, Core.src_label)
            Core.tarU_soft_label = Core.classifier.predict(tarU_feature)
            FitnessFunction.set_weight(src_feature, Core.src_label, tarU_feature, Core.tarU_soft_label)
            # Need to update the fitness value of best and pbest again
            best.fitness.values = FitnessFunction.fitness_function(src_feature, Core.src_label,
                                                                   tarU_feature, Core.tarU_soft_label,
                                                                   Core.classifier),
            for part in pop:
                indices = [index for index, entry in enumerate(part.best) if entry == 1.0]
                p_src_feature = Core.src_feature[:, indices]
                p_tarU_feature = Core.tarU_feature[:, indices]
                part.best.fitness.values = FitnessFunction.fitness_function(p_src_feature, Core.src_label,
                                                                            p_tarU_feature, Core.tarU_soft_label,
                                                                            Core.classifier),

    time_elapsed = (time.clock() - time_start)
    toWrite += "----Final -----\n"
    indices = [index for index, entry in enumerate(best) if entry == 1.0]
    src_feature = Core.src_feature[:, indices]
    tarU_feature = Core.tarU_feature[:, indices]
    acc = 1.0 - FitnessFunction.classification_error(training_feature=src_feature, training_label=Core.src_label,
                                                     classifier=Core.classifier,
                                                     testing_feature=tarU_feature, testing_label=Core.tarU_label)
    toWrite += ("Accuracy on unlabel target: " + str(acc) + "\n")
    toWrite += ("Accuracy on the target (No TL): %f\n" % (
                    1.0 - FitnessFunction.classification_error(training_feature=Core.src_feature, training_label=Core.src_label,
                                                               classifier=Core.classifier,
                                                               testing_feature=Core.tarU_feature, testing_label=Core.tarU_label)))
    toWrite += ("Computation time: %f\n" % time_elapsed)
    toWrite += ("Number of features: %d\n" % len(indices))
    toWrite += str(best)

    file.write(toWrite)
    file.close()