Example #1
0
def anchors_noise_offsets(anchors, offsets, rows, cols, spacing, z_step, x_indices_str, y_indices_str, x_minscale, y_minscale, x_maxscale, y_maxscale):
    from noise import pnoise3
    from plat.interpolate import lerp

    only_anchor = None
    if len(anchors) == 1:
        only_anchor = anchors[0]

    dim = len(anchors[0])
    x_offset = offset_from_string(x_indices_str, offsets, dim)
    y_offset = offset_from_string(y_indices_str, offsets, dim)

    num_row_anchors = (rows + spacing - 1) / spacing
    num_col_anchors = (cols + spacing - 1) / spacing

    newanchors = []
    cur_anchor_index = 0
    for j in range(num_row_anchors):
        y_frac = float(j) / num_row_anchors
        for i in range(num_col_anchors):
            if only_anchor is None:
                cur_anchor = anchors[cur_anchor_index]
                cur_anchor_index += 1
            else:
                cur_anchor = only_anchor
            x_frac = float(i) / num_col_anchors
            n1 = 0.5 * (1.0 + pnoise3(x_frac, y_frac, z_step, octaves=4, repeatz=2))
            n2 = 0.5 * (1.0 + pnoise3(100+x_frac, 100+y_frac, z_step, octaves=4, repeatz=2))
            x_scale = lerp(n1, x_minscale, x_maxscale)
            y_scale = lerp(n2, y_minscale, y_maxscale)
            # print("{}, {} produced {} -> {}, {} = {}".format(i,j,n1,x_minscale, x_maxscale,x_scale))
            newanchors.append(cur_anchor + x_scale * x_offset + y_scale * y_offset)
    return np.array(newanchors)
Example #2
0
def check_lazy_initialize(args, dmodel, classifier, vector_offsets):
    # debug: don't load anything...
    # return dmodel, classifier, smile_offsets

    # first get model ready
    if dmodel is None and args.model is not None:
        print('Loading saved model...')
        dmodel = DiscGenModel(filename=args.model)

    # first get model ready
    # if classifier is None and args.classifier is not None:
    #     print('Loading saved classifier...')
    #     classifier = create_running_graphs(args.classifier)

    # get attributes
    if vector_offsets is None and args.anchor_offset is not None:
        offsets = vectors_from_json_filelist(real_glob(args.anchor_offset))
        dim = len(offsets[0])
        offset_indexes = args.anchor_indexes.split(",")
        vector_offsets = [
            -1 * offset_from_string(offset_indexes[0], offsets, dim)
        ]
        for i in range(len(offset_indexes) - 1):
            vector_offsets.append(
                offset_from_string(offset_indexes[i + 1], offsets, dim))

    return dmodel, classifier, vector_offsets
Example #3
0
def anchors_json_offsets(anchors, offsets, rows, cols, spacing, z_step,
                         x_indices_str, y_indices_str, x_minscale, y_minscale,
                         x_maxscale, y_maxscale, range_data):
    only_anchor = None
    if len(anchors) == 1:
        only_anchor = anchors[0]

    dim = len(anchors[0])
    x_offset = offset_from_string(x_indices_str, offsets, dim)
    y_offset = offset_from_string(y_indices_str, offsets, dim)

    num_row_anchors = (rows + spacing - 1) / spacing
    num_col_anchors = (cols + spacing - 1) / spacing

    newanchors = []
    cur_anchor_index = 0
    for j in range(num_row_anchors):
        y_frac = float(j) / num_row_anchors
        for i in range(num_col_anchors):
            if only_anchor is None:
                cur_anchor = anchors[cur_anchor_index]
                cur_anchor_index += 1
            else:
                cur_anchor = only_anchor
            x_frac = float(i) / num_col_anchors
            n1 = range_data[z_step][0]
            n2 = range_data[z_step][1]
            x_scale = lerp(n1, x_minscale, x_maxscale)
            y_scale = lerp(n2, y_minscale, y_maxscale)
            # print("{}, {} produced {} -> {}, {} = {}".format(i,j,n1,x_minscale, x_maxscale,x_scale))
            newanchors.append(cur_anchor + x_scale * x_offset +
                              y_scale * y_offset)
    return np.array(newanchors)
Example #4
0
def apply_anchor_offsets(anchor, offsets, a, b, a_indices_str, b_indices_str):
    sa = 2.0 * (a - 0.5)
    sb = 2.0 * (b - 0.5)
    dim = len(anchor)
    a_offset = offset_from_string(a_indices_str, offsets, dim)
    b_offset = offset_from_string(b_indices_str, offsets, dim)
    new_anchor = anchor + sa * a_offset + sb * b_offset
    # print(a, a*a_offset)
    return new_anchor
Example #5
0
def apply_anchor_offsets(anchor, offsets, a, b, a_indices_str, b_indices_str):
    sa = 2.0 * (a - 0.5)
    sb = 2.0 * (b - 0.5)
    dim = len(anchor)
    a_offset = offset_from_string(a_indices_str, offsets, dim)
    b_offset = offset_from_string(b_indices_str, offsets, dim)
    new_anchor = anchor + sa * a_offset + sb * b_offset
    # print(a, a*a_offset)
    return new_anchor
Example #6
0
def anchors_from_offsets(anchor, offsets, x_indices_str, y_indices_str, x_minscale, y_minscale, x_maxscale, y_maxscale):
    dim = len(anchor)
    x_offset = offset_from_string(x_indices_str, offsets, dim)
    y_offset = offset_from_string(y_indices_str, offsets, dim)

    newanchors = []
    newanchors.append(anchor + x_minscale * x_offset + y_minscale * y_offset)
    newanchors.append(anchor + x_minscale * x_offset + y_maxscale * y_offset)
    newanchors.append(anchor + x_maxscale * x_offset + y_minscale * y_offset)
    newanchors.append(anchor + x_maxscale * x_offset + y_maxscale * y_offset)
    return np.array(newanchors)
Example #7
0
def anchors_from_offsets(anchor, offsets, x_indices_str, y_indices_str,
                         x_minscale, y_minscale, x_maxscale, y_maxscale):
    dim = len(anchor)
    x_offset = offset_from_string(x_indices_str, offsets, dim)
    y_offset = offset_from_string(y_indices_str, offsets, dim)

    newanchors = []
    newanchors.append(anchor + x_minscale * x_offset + y_minscale * y_offset)
    newanchors.append(anchor + x_minscale * x_offset + y_maxscale * y_offset)
    newanchors.append(anchor + x_maxscale * x_offset + y_minscale * y_offset)
    newanchors.append(anchor + x_maxscale * x_offset + y_maxscale * y_offset)
    return np.array(newanchors)
Example #8
0
def check_lazy_initialize(args, dmodel, smile_offsets):
    # debug: don't load anything...
    # return dmodel, smile_offsets

    # first get model ready
    if dmodel is None and (args.model is not None
                           or args.model_file is not None):
        print('Finding saved model...')
        dmodel = zoo.load_model(args.model, args.model_file, args.model_type)

    # get attributes
    if smile_offsets is None and args.anchor_offset is not None:
        offsets = get_json_vectors(args.anchor_offset)
        dim = len(offsets[0])
        offset_indexes = args.anchor_indexes.split(",")
        offset_vector = offset_from_string(offset_indexes[0], offsets, dim)
        for n in range(1, len(offset_indexes)):
            offset_vector += offset_from_string(offset_indexes[n], offsets,
                                                dim)
        smile_offsets = [offset_vector]

    return dmodel, smile_offsets
Example #9
0
def anchors_noise_offsets(anchors, offsets, rows, cols, spacing, z_step,
                          x_indices_str, y_indices_str, x_minscale, y_minscale,
                          x_maxscale, y_maxscale):
    from noise import pnoise3
    from plat.interpolate import lerp

    only_anchor = None
    if len(anchors) == 1:
        only_anchor = anchors[0]

    dim = len(anchors[0])
    x_offset = offset_from_string(x_indices_str, offsets, dim)
    y_offset = offset_from_string(y_indices_str, offsets, dim)

    num_row_anchors = (rows + spacing - 1) / spacing
    num_col_anchors = (cols + spacing - 1) / spacing

    newanchors = []
    cur_anchor_index = 0
    for j in range(num_row_anchors):
        y_frac = float(j) / num_row_anchors
        for i in range(num_col_anchors):
            if only_anchor is None:
                cur_anchor = anchors[cur_anchor_index]
                cur_anchor_index += 1
            else:
                cur_anchor = only_anchor
            x_frac = float(i) / num_col_anchors
            n1 = 0.5 * (1.0 +
                        pnoise3(x_frac, y_frac, z_step, octaves=4, repeatz=2))
            n2 = 0.5 * (1.0 + pnoise3(
                100 + x_frac, 100 + y_frac, z_step, octaves=4, repeatz=2))
            x_scale = lerp(n1, x_minscale, x_maxscale)
            y_scale = lerp(n2, y_minscale, y_maxscale)
            # print("{}, {} produced {} -> {}, {} = {}".format(i,j,n1,x_minscale, x_maxscale,x_scale))
            newanchors.append(cur_anchor + x_scale * x_offset +
                              y_scale * y_offset)
    return np.array(newanchors)
Example #10
0
def anchors_wave_offsets(anchors, offsets, rows, cols, spacing, radial_wave,
                         clip_wave, z_step, x_indices_str, x_minscale,
                         x_maxscale):
    only_anchor = None
    if len(anchors) == 1:
        only_anchor = anchors[0]

    dim = len(anchors[0])
    x_offset = offset_from_string(x_indices_str, offsets, dim)

    num_row_anchors = (rows + spacing - 1) / spacing
    num_col_anchors = (cols + spacing - 1) / spacing

    newanchors = []
    cur_anchor_index = 0
    center_pt = [(num_col_anchors - 1) / 2.0, (num_row_anchors - 1) / 2.0]
    max_dist = distance_2d([0, 0], center_pt)
    for j in range(num_row_anchors):
        for i in range(num_col_anchors):
            if only_anchor is None:
                cur_anchor = anchors[cur_anchor_index]
                cur_anchor_index += 1
            else:
                cur_anchor = only_anchor
            cur_dist = distance_2d([i, j], center_pt)
            if radial_wave:
                x_frac = (max_dist - cur_dist) / max_dist
            else:
                x_frac = float(i) / num_col_anchors
            wave_val = z_step + x_frac
            n1 = compute_wave(wave_val, clip_wave)
            x_scale = lerp(n1, x_minscale, x_maxscale)
            # if wave_val < 0.0 or wave_val > 1.0:
            #     x_scale = x_minscale
            # else:
            #     if wave_val < 0.5:
            #         n1 = wave_val * 2
            #     else:
            #         n1 = (1.0 - wave_val) * 2
            #     x_scale = lerp(n1, x_minscale, x_maxscale)
            # print("{}, {} produced {} -> {}, {} = {}".format(i,j,n1,x_minscale, x_maxscale,x_scale))
            newanchors.append(cur_anchor + x_scale * x_offset)
    return np.array(newanchors)
Example #11
0
def get_global_offset(offsets, indices_str, scale):
    dim = len(offsets[0])
    global_offset = offset_from_string(indices_str, offsets, dim)
    return scale * global_offset
Example #12
0
def atvec(parser, context, args):
    parser.add_argument('--dataset',
                        dest='dataset',
                        default=None,
                        help="Source dataset (for labels).")
    parser.add_argument('--labels',
                        dest='labels',
                        default=None,
                        help="Text file with 0/1 labels.")
    parser.add_argument(
        '--split',
        dest='split',
        default="train",
        help=
        "Which split to use from the dataset (train/nontrain/valid/test/any).")
    parser.add_argument("--num-attribs",
                        dest='num_attribs',
                        type=int,
                        default=40,
                        help="Number of attributes (labes)")
    parser.add_argument("--z-dim",
                        dest='z_dim',
                        type=int,
                        default=100,
                        help="z dimension of vectors")
    parser.add_argument("--encoded-vectors",
                        type=str,
                        default=None,
                        help="Comma separated list of json arrays")
    parser.add_argument(
        '--thresh',
        dest='thresh',
        default=False,
        action='store_true',
        help="Compute thresholds for attribute vectors classifiers")
    parser.add_argument('--roc',
                        dest='roc',
                        default=False,
                        action='store_true',
                        help="ROC curve of selected attribute vectors")
    parser.add_argument("--attribute-vectors",
                        dest='attribute_vectors',
                        default=None,
                        help="use json file as source of attribute vectors")
    parser.add_argument(
        "--attribute-thresholds",
        dest='attribute_thresholds',
        default=None,
        help="use these non-zero values for binary classifier thresholds")
    parser.add_argument('--attribute-indices',
                        dest='attribute_indices',
                        default=None,
                        type=str,
                        help="indices to select specific attribute vectors")
    parser.add_argument(
        "--balanced2",
        dest='balanced2',
        type=str,
        default=None,
        help="Balanced two attributes and generate atvec. eg: 20,31")
    parser.add_argument(
        "--balanced",
        dest='balanced',
        type=str,
        default=None,
        help="Balance attributes and generate atvec. eg: 20,21,31")
    parser.add_argument("--avg-diff",
                        dest='avg_diff',
                        type=str,
                        default=None,
                        help="Two lists of vectors to average and then diff")
    parser.add_argument('--outfile',
                        dest='outfile',
                        default=None,
                        help="Output json file for vectors.")
    args = parser.parse_args(args)

    if args.avg_diff:
        vecs1, vecs2 = args.avg_diff.split(",")
        encoded1 = json_list_to_array(vecs1)
        encoded2 = json_list_to_array(vecs2)
        print("Taking the difference between {} and {} vectors".format(
            len(encoded1), len(encoded2)))
        m1 = np.mean(encoded1, axis=0)
        m2 = np.mean(encoded2, axis=0)
        atvec = m2 - m1
        z_dim, = atvec.shape
        atvecs = atvec.reshape(1, z_dim)
        print("Computed diff shape: {}".format(atvecs.shape))
        if args.outfile is not None:
            save_json_attribs(atvecs, args.outfile)
        sys.exit(0)

    encoded = json_list_to_array(args.encoded_vectors)
    num_rows, z_dim = encoded.shape
    if args.dataset:
        attribs = np.array(
            list(
                get_dataset_iterator(args.dataset,
                                     args.split,
                                     include_features=False,
                                     include_targets=True)))
    else:
        attribs = get_attribs_from_file(args.labels)
    print("encoded vectors: {}, attributes: {} ".format(
        encoded.shape, attribs.shape))

    if args.roc:
        atvecs = get_json_vectors(args.attribute_vectors)
        dim = len(atvecs[0])
        chosen_vector = offset_from_string(args.attribute_indices, atvecs, dim)
        if args.attribute_thresholds is not None:
            atvec_thresholds = get_json_vectors(args.attribute_thresholds)
            threshold = atvec_thresholds[0][int(args.attribute_indices)]
        else:
            threshold = None
        do_roc(chosen_vector, encoded, attribs, int(args.attribute_indices),
               threshold, args.outfile)
        sys.exit(0)

    if args.thresh:
        atvecs = get_json_vectors(args.attribute_vectors)
        do_thresh(atvecs, encoded, attribs, args.outfile)
        sys.exit(0)

    if (args.balanced2):
        indexes = map(int, args.balanced2.split(","))
        with_attr, without_attr = get_balanced_averages2(
            attribs, encoded, indexes[0], indexes[1])
        num_attribs = 2
    elif (args.balanced):
        indexes = map(int, args.balanced.split(","))
        with_attr, without_attr = get_balanced_averages(
            attribs, encoded, indexes)
        num_attribs = len(indexes)
    else:
        with_attr, without_attr = get_averages(attribs, encoded,
                                               args.num_attribs)
        num_attribs = args.num_attribs

    atvects = averages_to_attribute_vectors(with_attr, without_attr,
                                            num_attribs, z_dim)
    print("Computed atvecs shape: {}".format(atvects.shape))

    if args.outfile is not None:
        save_json_attribs(atvects, args.outfile)
Example #13
0
def atvec(parser, context, args):
    parser.add_argument('--dataset',
                        dest='dataset',
                        default=None,
                        help="Source dataset (for labels).")
    # memo: --labels became --attributes when --classes was added
    parser.add_argument('--attributes',
                        dest='attributes',
                        default=None,
                        help="Text file with 0/1 labels.")
    parser.add_argument('--classes',
                        dest='classes',
                        default=None,
                        help="Text file with 0/1/2/.../num-classes-1 labels.")
    parser.add_argument(
        '--split',
        dest='split',
        default="train",
        help=
        "Which split to use from the dataset (train/nontrain/valid/test/any).")
    parser.add_argument("--num-attribs",
                        dest='num_attribs',
                        type=int,
                        default=40,
                        help="Number of attributes (labes)")
    parser.add_argument(
        "--which-attribs",
        type=str,
        default=None,
        help="optional comma separated list of attributes to run")
    parser.add_argument(
        "--num-classes",
        dest='num_classes',
        type=int,
        default=None,
        help="For multiclass, number of classes (assumed 0 .. n-1)")
    parser.add_argument("--z-dim",
                        dest='z_dim',
                        type=int,
                        default=100,
                        help="z dimension of vectors")
    parser.add_argument("--encoded-vectors",
                        type=str,
                        default=None,
                        help="Comma separated list of json arrays")
    parser.add_argument("--encoded-true",
                        type=str,
                        default=None,
                        help="Comma separated list of json arrays (true)")
    parser.add_argument("--encoded-false",
                        type=str,
                        default=None,
                        help="Comma separated list of json arrays (false)")
    parser.add_argument(
        '--thresh',
        dest='thresh',
        default=False,
        action='store_true',
        help="Compute thresholds for attribute vectors classifiers")
    parser.add_argument('--svm',
                        dest='svm',
                        default=False,
                        action='store_true',
                        help="Use SVM for computing attribute vectors")
    parser.add_argument("--limit",
                        dest='limit',
                        type=int,
                        default=None,
                        help="Limit number of inputs when computing atvecs")
    parser.add_argument('--roc',
                        dest='roc',
                        default=False,
                        action='store_true',
                        help="ROC curve of selected attribute vectors")
    parser.add_argument("--attribute-vectors",
                        dest='attribute_vectors',
                        default=None,
                        help="use json file as source of attribute vectors")
    parser.add_argument(
        "--attribute-thresholds",
        dest='attribute_thresholds',
        default=None,
        help="use these non-zero values for binary classifier thresholds")
    parser.add_argument("--attribute-set",
                        dest='attribute_set',
                        default="all",
                        help="score ROC/accuracy against true/false/all")
    parser.add_argument('--attribute-indices',
                        dest='attribute_indices',
                        default=None,
                        type=str,
                        help="indices to select specific attribute vectors")
    parser.add_argument(
        "--balanced2",
        dest='balanced2',
        type=str,
        default=None,
        help="Balanced two attributes and generate atvec. eg: 20,31")
    parser.add_argument(
        "--balanced",
        dest='balanced',
        type=str,
        default=None,
        help="Balance attributes and generate atvec. eg: 20,21,31")
    parser.add_argument("--avg-diff",
                        dest='avg_diff',
                        type=str,
                        default=None,
                        help="Two lists of vectors to average and then diff")
    parser.add_argument(
        "--svm-diff",
        dest='svm_diff',
        type=str,
        default=None,
        help="Two lists of vectors to average and then svm diff")
    parser.add_argument('--outfile',
                        dest='outfile',
                        default=None,
                        help="Output json file for vectors.")
    args = parser.parse_args(args)

    if args.avg_diff:
        vecs1, vecs2 = args.avg_diff.split(",")
        encoded1 = json_list_to_array(vecs1)
        encoded2 = json_list_to_array(vecs2)
        print("Taking the difference between {} and {} vectors".format(
            len(encoded1), len(encoded2)))
        m1 = np.mean(encoded1, axis=0)
        m2 = np.mean(encoded2, axis=0)
        atvec = m2 - m1
        z_dim, = atvec.shape
        atvecs = atvec.reshape(1, z_dim)
        print("Computed diff shape: {}".format(atvecs.shape))
        if args.outfile is not None:
            save_json_attribs(atvecs, args.outfile)
        sys.exit(0)

    if args.svm_diff:
        vecs1, vecs2 = args.svm_diff.split(",")
        encoded1 = json_list_to_array(vecs1)
        encoded2 = json_list_to_array(vecs2)
        print("Taking the svm difference between {} and {} vectors".format(
            len(encoded1), len(encoded2)))
        h = .02  # step size in the mesh
        C = 1.0  # SVM regularization parameter
        X_arr = []
        y_arr = []
        for l in range(len(encoded1)):
            X_arr.append(encoded1[l])
            y_arr.append(False)
        for l in range(len(encoded2)):
            X_arr.append(encoded2[l])
            y_arr.append(True)
        X = np.array(X_arr)
        y = np.array(y_arr)
        # svc = svm.LinearSVC(C=C, class_weight="balanced").fit(X, y)
        svc = svm.LinearSVC(C=C).fit(X, y)
        # get the separating hyperplane
        w = svc.coef_[0]

        #FIXME: this is a scaling hack.
        m1 = np.mean(encoded1, axis=0)
        m2 = np.mean(encoded2, axis=0)
        mean_vector = m1 - m2
        mean_length = np.linalg.norm(mean_vector)
        svn_length = np.linalg.norm(w)

        atvec = (mean_length / svn_length) * w
        z_dim, = atvec.shape
        atvecs = atvec.reshape(1, z_dim)
        print("Computed svm diff shape: {}".format(atvecs.shape))
        if args.outfile is not None:
            save_json_attribs(atvecs, args.outfile)
        sys.exit(0)

    print("reading encoded vectors...")
    attribs = None
    if args.encoded_vectors is not None:
        if args.encoded_vectors.endswith("json"):
            encoded = json_list_to_array(args.encoded_vectors)
            print("Read json array: {}".format(encoded.shape))
        else:
            encoded = np.load(args.encoded_vectors)['arr_0']
            print("Read numpy array: {}".format(encoded.shape))
    else:
        if args.encoded_true.endswith("json"):
            encoded_true = json_list_to_array(args.encoded_true)
            print("Read true json array: {}".format(encoded_true.shape))
        else:
            encoded_true = np.load(args.encoded_true)['arr_0']
            print("Read true numpy array: {}".format(encoded_true.shape))
        if args.encoded_false.endswith("json"):
            encoded_false = json_list_to_array(args.encoded_false)
            print("Read false json array: {}".format(encoded_false.shape))
        else:
            encoded_false = np.load(args.encoded_false)['arr_0']
            print("Read false numpy array: {}".format(encoded_false.shape))
        encoded = np.concatenate((encoded_true, encoded_false), axis=0)
        num_true = len(encoded_true)
        num_false = len(encoded_false)
        true_values = np.ones(shape=[num_true, 1, 1], dtype=np.int)
        false_values = np.zeros(shape=[num_false, 1, 1], dtype=np.int)
        attribs = np.concatenate((true_values, false_values), axis=0)

    if args.limit is not None:
        encoded = encoded[:args.limit]
    num_rows, z_dim = encoded.shape
    if attribs is None:
        print("reading attributes...")
        if args.dataset:
            attribs = np.array(
                list(
                    get_dataset_iterator(args.dataset,
                                         args.split,
                                         include_features=False,
                                         include_targets=True)))
            print("Read attributes from dataset: {}".format(attribs.shape))
        elif args.attributes is not None:
            print("Read attributes from file: {}".format(args.attributes))
            attribs = get_attribs_from_files(args.attributes)
        elif args.classes is not None:
            print("Read attributes from file: {}".format(args.classes))
            attribs = get_attribs_from_class_file(args.classes,
                                                  args.num_classes)
        else:
            print(
                "Don't know how to get labels: try --attributes or --classes")
            sys.exit(1)

    if args.which_attribs is not None:
        attribs = filter_attributes(attribs, args.which_attribs)
    print("encoded vectors: {}, attributes: {} ".format(
        encoded.shape, attribs.shape))

    if args.roc:
        atvecs = get_json_vectors(args.attribute_vectors)
        dim = len(atvecs[0])
        chosen_vector = offset_from_string(args.attribute_indices, atvecs, dim)
        if args.attribute_thresholds is not None:
            atvec_thresholds = get_json_vectors(args.attribute_thresholds)
            threshold = atvec_thresholds[0][int(args.attribute_indices)]
        else:
            threshold = None
        do_roc(chosen_vector,
               encoded,
               attribs,
               int(args.attribute_indices),
               threshold,
               args.attribute_set,
               args.outfile,
               isclass=False)
        # do_roc(chosen_vector, encoded, attribs, int(args.attribute_indices), threshold, args.attribute_set, args.outfile, isclass=(args.num_classes is not None))
        sys.exit(0)

    if args.thresh:
        atvecs = get_json_vectors(args.attribute_vectors)
        do_thresh(atvecs,
                  encoded,
                  attribs,
                  args.outfile,
                  isclass=(args.num_classes is not None))
        sys.exit(0)

    if (args.balanced2):
        indexes = map(int, args.balanced2.split(","))
        with_attr, without_attr = get_balanced_averages2(
            attribs, encoded, indexes[0], indexes[1])
        num_attribs = 2
    elif (args.balanced):
        indexes = map(int, args.balanced.split(","))
        with_attr, without_attr = get_balanced_averages(
            attribs, encoded, indexes)
        num_attribs = len(indexes)
    # I can't remember why
    # elif args.num_classes is not None:
    #     with_attr, without_attr = get_class_averages(attribs, encoded, args.num_classes);
    #     num_attribs = args.num_classes
    elif args.num_attribs is not None:
        with_attr, without_attr = get_averages(attribs, encoded)
        num_attribs = args.num_attribs
    else:
        print("I think we need either num_classes or num_attribs or something")
        sys.exit(0)

    if args.svm:
        atvects = averages_to_svm_attribute_vectors(with_attr, without_attr)
    else:
        atvects = averages_to_attribute_vectors(with_attr, without_attr)
    print("Computed atvecs shape: {}".format(atvects.shape))

    if args.outfile is not None:
        save_json_attribs(atvects, args.outfile)
Example #14
0
def get_global_offset(offsets, indices_str, scale):
    dim = len(offsets[0])
    global_offset = offset_from_string(indices_str, offsets, dim)
    return scale * global_offset
Example #15
0
def atvec(parser, context, args):
    parser.add_argument('--dataset', dest='dataset', default=None,
                        help="Source dataset (for labels).")
    # memo: --labels became --attributes when --classes was added
    parser.add_argument('--attributes', dest='attributes', default=None,
                        help="Text file with 0/1 labels.")
    parser.add_argument('--classes', dest='classes', default=None,
                        help="Text file with 0/1/2/.../num-classes-1 labels.")
    parser.add_argument('--split', dest='split', default="train",
                        help="Which split to use from the dataset (train/nontrain/valid/test/any).")
    parser.add_argument("--num-attribs", dest='num_attribs', type=int, default=40,
                        help="Number of attributes (labes)")
    parser.add_argument("--which-attribs", type=str, default=None,
                        help="optional comma separated list of attributes to run")
    parser.add_argument("--num-classes", dest='num_classes', type=int, default=None,
                        help="For multiclass, number of classes (assumed 0 .. n-1)")
    parser.add_argument("--z-dim", dest='z_dim', type=int, default=100,
                        help="z dimension of vectors")
    parser.add_argument("--encoded-vectors", type=str, default=None,
                        help="Comma separated list of json arrays")
    parser.add_argument("--encoded-true", type=str, default=None,
                        help="Comma separated list of json arrays (true)")
    parser.add_argument("--encoded-false", type=str, default=None,
                        help="Comma separated list of json arrays (false)")
    parser.add_argument('--thresh', dest='thresh', default=False, action='store_true',
                        help="Compute thresholds for attribute vectors classifiers")
    parser.add_argument('--svm', dest='svm', default=False, action='store_true',
                        help="Use SVM for computing attribute vectors")
    parser.add_argument("--limit", dest='limit', type=int, default=None,
                        help="Limit number of inputs when computing atvecs")
    parser.add_argument('--roc', dest='roc', default=False, action='store_true',
                        help="ROC curve of selected attribute vectors")
    parser.add_argument("--attribute-vectors", dest='attribute_vectors', default=None,
                        help="use json file as source of attribute vectors")
    parser.add_argument("--attribute-thresholds", dest='attribute_thresholds', default=None,
                        help="use these non-zero values for binary classifier thresholds")
    parser.add_argument("--attribute-set", dest='attribute_set', default="all",
                        help="score ROC/accuracy against true/false/all")
    parser.add_argument('--attribute-indices', dest='attribute_indices', default=None, type=str,
                        help="indices to select specific attribute vectors")
    parser.add_argument("--balanced2", dest='balanced2', type=str, default=None,
                        help="Balanced two attributes and generate atvec. eg: 20,31")
    parser.add_argument("--balanced", dest='balanced', type=str, default=None,
                        help="Balance attributes and generate atvec. eg: 20,21,31")
    parser.add_argument("--avg-diff", dest='avg_diff', type=str, default=None,
                        help="Two lists of vectors to average and then diff")
    parser.add_argument("--svm-diff", dest='svm_diff', type=str, default=None,
                        help="Two lists of vectors to average and then svm diff")
    parser.add_argument('--outfile', dest='outfile', default=None,
                        help="Output json file for vectors.")
    args = parser.parse_args(args)

    if args.avg_diff:
        vecs1, vecs2 = args.avg_diff.split(",")
        encoded1 = json_list_to_array(vecs1)
        encoded2 = json_list_to_array(vecs2)
        print("Taking the difference between {} and {} vectors".format(len(encoded1), len(encoded2)))
        m1 = np.mean(encoded1,axis=0)
        m2 = np.mean(encoded2,axis=0)
        atvec = m2 - m1
        z_dim, = atvec.shape
        atvecs = atvec.reshape(1,z_dim)
        print("Computed diff shape: {}".format(atvecs.shape))
        if args.outfile is not None:
            save_json_attribs(atvecs, args.outfile)
        sys.exit(0)

    if args.svm_diff:
        vecs1, vecs2 = args.svm_diff.split(",")
        encoded1 = json_list_to_array(vecs1)
        encoded2 = json_list_to_array(vecs2)
        print("Taking the svm difference between {} and {} vectors".format(len(encoded1), len(encoded2)))
        h = .02  # step size in the mesh
        C = 1.0  # SVM regularization parameter
        X_arr = []
        y_arr = []
        for l in range(len(encoded1)):
            X_arr.append(encoded1[l])
            y_arr.append(False)
        for l in range(len(encoded2)):
            X_arr.append(encoded2[l])
            y_arr.append(True)
        X = np.array(X_arr)
        y = np.array(y_arr)
        # svc = svm.LinearSVC(C=C, class_weight="balanced").fit(X, y)
        svc = svm.LinearSVC(C=C).fit(X, y)
        # get the separating hyperplane
        w = svc.coef_[0]

        #FIXME: this is a scaling hack.
        m1 = np.mean(encoded1,axis=0)
        m2 = np.mean(encoded2,axis=0)
        mean_vector = m1 - m2
        mean_length = np.linalg.norm(mean_vector)
        svn_length = np.linalg.norm(w)

        atvec = (mean_length / svn_length)  * w
        z_dim, = atvec.shape
        atvecs = atvec.reshape(1,z_dim)
        print("Computed svm diff shape: {}".format(atvecs.shape))
        if args.outfile is not None:
            save_json_attribs(atvecs, args.outfile)
        sys.exit(0)

    print("reading encoded vectors...")
    attribs = None
    if args.encoded_vectors is not None:
        if args.encoded_vectors.endswith("json"):
            encoded = json_list_to_array(args.encoded_vectors)
            print("Read json array: {}".format(encoded.shape))
        else:
            encoded = np.load(args.encoded_vectors)['arr_0']
            print("Read numpy array: {}".format(encoded.shape))
    else:
        if args.encoded_true.endswith("json"):
            encoded_true = json_list_to_array(args.encoded_true)
            print("Read true json array: {}".format(encoded_true.shape))
        else:
            encoded_true = np.load(args.encoded_true)['arr_0']
            print("Read true numpy array: {}".format(encoded_true.shape))
        if args.encoded_false.endswith("json"):
            encoded_false = json_list_to_array(args.encoded_false)
            print("Read false json array: {}".format(encoded_false.shape))
        else:
            encoded_false = np.load(args.encoded_false)['arr_0']
            print("Read false numpy array: {}".format(encoded_false.shape))
        encoded = np.concatenate((encoded_true, encoded_false), axis=0)
        num_true = len(encoded_true)
        num_false = len(encoded_false)
        true_values = np.ones(shape=[num_true,1,1], dtype=np.int)
        false_values = np.zeros(shape=[num_false,1,1], dtype=np.int)
        attribs = np.concatenate((true_values, false_values), axis=0)

    if args.limit is not None:
        encoded = encoded[:args.limit]
    num_rows, z_dim = encoded.shape
    if attribs is None:
        print("reading attributes...")
        if args.dataset:
            attribs = np.array(list(get_dataset_iterator(args.dataset, args.split, include_features=False, include_targets=True)))
            print("Read attributes from dataset: {}".format(attribs.shape))
        elif args.attributes is not None:
            print("Read attributes from file: {}".format(args.attributes))
            attribs = get_attribs_from_files(args.attributes)
        elif args.classes is not None:
            print("Read attributes from file: {}".format(args.classes))
            attribs = get_attribs_from_class_file(args.classes, args.num_classes)
        else:
            print("Don't know how to get labels: try --attributes or --classes")
            sys.exit(1)

    if args.which_attribs is not None:
        attribs = filter_attributes(attribs, args.which_attribs)
    print("encoded vectors: {}, attributes: {} ".format(encoded.shape, attribs.shape))

    if args.roc:
        atvecs = get_json_vectors(args.attribute_vectors)
        dim = len(atvecs[0])
        chosen_vector = offset_from_string(args.attribute_indices, atvecs, dim)
        if args.attribute_thresholds is not None:
            atvec_thresholds = get_json_vectors(args.attribute_thresholds)
            threshold = atvec_thresholds[0][int(args.attribute_indices)]
        else:
            threshold = None
        do_roc(chosen_vector, encoded, attribs, int(args.attribute_indices), threshold, args.attribute_set, args.outfile, isclass=False)
        # do_roc(chosen_vector, encoded, attribs, int(args.attribute_indices), threshold, args.attribute_set, args.outfile, isclass=(args.num_classes is not None))
        sys.exit(0)

    if args.thresh:
        atvecs = get_json_vectors(args.attribute_vectors)
        do_thresh(atvecs, encoded, attribs, args.outfile, isclass=(args.num_classes is not None))
        sys.exit(0)

    if(args.balanced2):
        indexes = map(int, args.balanced2.split(","))
        with_attr, without_attr = get_balanced_averages2(attribs, encoded, indexes[0], indexes[1]);
        num_attribs = 2
    elif(args.balanced):
        indexes = map(int, args.balanced.split(","))
        with_attr, without_attr = get_balanced_averages(attribs, encoded, indexes);
        num_attribs = len(indexes)
    # I can't remember why
    # elif args.num_classes is not None:
    #     with_attr, without_attr = get_class_averages(attribs, encoded, args.num_classes);
    #     num_attribs = args.num_classes
    elif args.num_attribs is not None:
        with_attr, without_attr = get_averages(attribs, encoded);
        num_attribs = args.num_attribs
    else:
        print("I think we need either num_classes or num_attribs or something")
        sys.exit(0);

    if args.svm:
        atvects = averages_to_svm_attribute_vectors(with_attr, without_attr)
    else:
        atvects = averages_to_attribute_vectors(with_attr, without_attr)
    print("Computed atvecs shape: {}".format(atvects.shape))

    if args.outfile is not None:
        save_json_attribs(atvects, args.outfile)