def anchors_noise_offsets(anchors, offsets, rows, cols, spacing, z_step, x_indices_str, y_indices_str, x_minscale, y_minscale, x_maxscale, y_maxscale): from noise import pnoise3 from plat.interpolate import lerp only_anchor = None if len(anchors) == 1: only_anchor = anchors[0] dim = len(anchors[0]) x_offset = offset_from_string(x_indices_str, offsets, dim) y_offset = offset_from_string(y_indices_str, offsets, dim) num_row_anchors = (rows + spacing - 1) / spacing num_col_anchors = (cols + spacing - 1) / spacing newanchors = [] cur_anchor_index = 0 for j in range(num_row_anchors): y_frac = float(j) / num_row_anchors for i in range(num_col_anchors): if only_anchor is None: cur_anchor = anchors[cur_anchor_index] cur_anchor_index += 1 else: cur_anchor = only_anchor x_frac = float(i) / num_col_anchors n1 = 0.5 * (1.0 + pnoise3(x_frac, y_frac, z_step, octaves=4, repeatz=2)) n2 = 0.5 * (1.0 + pnoise3(100+x_frac, 100+y_frac, z_step, octaves=4, repeatz=2)) x_scale = lerp(n1, x_minscale, x_maxscale) y_scale = lerp(n2, y_minscale, y_maxscale) # print("{}, {} produced {} -> {}, {} = {}".format(i,j,n1,x_minscale, x_maxscale,x_scale)) newanchors.append(cur_anchor + x_scale * x_offset + y_scale * y_offset) return np.array(newanchors)
def check_lazy_initialize(args, dmodel, classifier, vector_offsets): # debug: don't load anything... # return dmodel, classifier, smile_offsets # first get model ready if dmodel is None and args.model is not None: print('Loading saved model...') dmodel = DiscGenModel(filename=args.model) # first get model ready # if classifier is None and args.classifier is not None: # print('Loading saved classifier...') # classifier = create_running_graphs(args.classifier) # get attributes if vector_offsets is None and args.anchor_offset is not None: offsets = vectors_from_json_filelist(real_glob(args.anchor_offset)) dim = len(offsets[0]) offset_indexes = args.anchor_indexes.split(",") vector_offsets = [ -1 * offset_from_string(offset_indexes[0], offsets, dim) ] for i in range(len(offset_indexes) - 1): vector_offsets.append( offset_from_string(offset_indexes[i + 1], offsets, dim)) return dmodel, classifier, vector_offsets
def anchors_json_offsets(anchors, offsets, rows, cols, spacing, z_step, x_indices_str, y_indices_str, x_minscale, y_minscale, x_maxscale, y_maxscale, range_data): only_anchor = None if len(anchors) == 1: only_anchor = anchors[0] dim = len(anchors[0]) x_offset = offset_from_string(x_indices_str, offsets, dim) y_offset = offset_from_string(y_indices_str, offsets, dim) num_row_anchors = (rows + spacing - 1) / spacing num_col_anchors = (cols + spacing - 1) / spacing newanchors = [] cur_anchor_index = 0 for j in range(num_row_anchors): y_frac = float(j) / num_row_anchors for i in range(num_col_anchors): if only_anchor is None: cur_anchor = anchors[cur_anchor_index] cur_anchor_index += 1 else: cur_anchor = only_anchor x_frac = float(i) / num_col_anchors n1 = range_data[z_step][0] n2 = range_data[z_step][1] x_scale = lerp(n1, x_minscale, x_maxscale) y_scale = lerp(n2, y_minscale, y_maxscale) # print("{}, {} produced {} -> {}, {} = {}".format(i,j,n1,x_minscale, x_maxscale,x_scale)) newanchors.append(cur_anchor + x_scale * x_offset + y_scale * y_offset) return np.array(newanchors)
def apply_anchor_offsets(anchor, offsets, a, b, a_indices_str, b_indices_str): sa = 2.0 * (a - 0.5) sb = 2.0 * (b - 0.5) dim = len(anchor) a_offset = offset_from_string(a_indices_str, offsets, dim) b_offset = offset_from_string(b_indices_str, offsets, dim) new_anchor = anchor + sa * a_offset + sb * b_offset # print(a, a*a_offset) return new_anchor
def anchors_from_offsets(anchor, offsets, x_indices_str, y_indices_str, x_minscale, y_minscale, x_maxscale, y_maxscale): dim = len(anchor) x_offset = offset_from_string(x_indices_str, offsets, dim) y_offset = offset_from_string(y_indices_str, offsets, dim) newanchors = [] newanchors.append(anchor + x_minscale * x_offset + y_minscale * y_offset) newanchors.append(anchor + x_minscale * x_offset + y_maxscale * y_offset) newanchors.append(anchor + x_maxscale * x_offset + y_minscale * y_offset) newanchors.append(anchor + x_maxscale * x_offset + y_maxscale * y_offset) return np.array(newanchors)
def check_lazy_initialize(args, dmodel, smile_offsets): # debug: don't load anything... # return dmodel, smile_offsets # first get model ready if dmodel is None and (args.model is not None or args.model_file is not None): print('Finding saved model...') dmodel = zoo.load_model(args.model, args.model_file, args.model_type) # get attributes if smile_offsets is None and args.anchor_offset is not None: offsets = get_json_vectors(args.anchor_offset) dim = len(offsets[0]) offset_indexes = args.anchor_indexes.split(",") offset_vector = offset_from_string(offset_indexes[0], offsets, dim) for n in range(1, len(offset_indexes)): offset_vector += offset_from_string(offset_indexes[n], offsets, dim) smile_offsets = [offset_vector] return dmodel, smile_offsets
def anchors_noise_offsets(anchors, offsets, rows, cols, spacing, z_step, x_indices_str, y_indices_str, x_minscale, y_minscale, x_maxscale, y_maxscale): from noise import pnoise3 from plat.interpolate import lerp only_anchor = None if len(anchors) == 1: only_anchor = anchors[0] dim = len(anchors[0]) x_offset = offset_from_string(x_indices_str, offsets, dim) y_offset = offset_from_string(y_indices_str, offsets, dim) num_row_anchors = (rows + spacing - 1) / spacing num_col_anchors = (cols + spacing - 1) / spacing newanchors = [] cur_anchor_index = 0 for j in range(num_row_anchors): y_frac = float(j) / num_row_anchors for i in range(num_col_anchors): if only_anchor is None: cur_anchor = anchors[cur_anchor_index] cur_anchor_index += 1 else: cur_anchor = only_anchor x_frac = float(i) / num_col_anchors n1 = 0.5 * (1.0 + pnoise3(x_frac, y_frac, z_step, octaves=4, repeatz=2)) n2 = 0.5 * (1.0 + pnoise3( 100 + x_frac, 100 + y_frac, z_step, octaves=4, repeatz=2)) x_scale = lerp(n1, x_minscale, x_maxscale) y_scale = lerp(n2, y_minscale, y_maxscale) # print("{}, {} produced {} -> {}, {} = {}".format(i,j,n1,x_minscale, x_maxscale,x_scale)) newanchors.append(cur_anchor + x_scale * x_offset + y_scale * y_offset) return np.array(newanchors)
def anchors_wave_offsets(anchors, offsets, rows, cols, spacing, radial_wave, clip_wave, z_step, x_indices_str, x_minscale, x_maxscale): only_anchor = None if len(anchors) == 1: only_anchor = anchors[0] dim = len(anchors[0]) x_offset = offset_from_string(x_indices_str, offsets, dim) num_row_anchors = (rows + spacing - 1) / spacing num_col_anchors = (cols + spacing - 1) / spacing newanchors = [] cur_anchor_index = 0 center_pt = [(num_col_anchors - 1) / 2.0, (num_row_anchors - 1) / 2.0] max_dist = distance_2d([0, 0], center_pt) for j in range(num_row_anchors): for i in range(num_col_anchors): if only_anchor is None: cur_anchor = anchors[cur_anchor_index] cur_anchor_index += 1 else: cur_anchor = only_anchor cur_dist = distance_2d([i, j], center_pt) if radial_wave: x_frac = (max_dist - cur_dist) / max_dist else: x_frac = float(i) / num_col_anchors wave_val = z_step + x_frac n1 = compute_wave(wave_val, clip_wave) x_scale = lerp(n1, x_minscale, x_maxscale) # if wave_val < 0.0 or wave_val > 1.0: # x_scale = x_minscale # else: # if wave_val < 0.5: # n1 = wave_val * 2 # else: # n1 = (1.0 - wave_val) * 2 # x_scale = lerp(n1, x_minscale, x_maxscale) # print("{}, {} produced {} -> {}, {} = {}".format(i,j,n1,x_minscale, x_maxscale,x_scale)) newanchors.append(cur_anchor + x_scale * x_offset) return np.array(newanchors)
def get_global_offset(offsets, indices_str, scale): dim = len(offsets[0]) global_offset = offset_from_string(indices_str, offsets, dim) return scale * global_offset
def atvec(parser, context, args): parser.add_argument('--dataset', dest='dataset', default=None, help="Source dataset (for labels).") parser.add_argument('--labels', dest='labels', default=None, help="Text file with 0/1 labels.") parser.add_argument( '--split', dest='split', default="train", help= "Which split to use from the dataset (train/nontrain/valid/test/any).") parser.add_argument("--num-attribs", dest='num_attribs', type=int, default=40, help="Number of attributes (labes)") parser.add_argument("--z-dim", dest='z_dim', type=int, default=100, help="z dimension of vectors") parser.add_argument("--encoded-vectors", type=str, default=None, help="Comma separated list of json arrays") parser.add_argument( '--thresh', dest='thresh', default=False, action='store_true', help="Compute thresholds for attribute vectors classifiers") parser.add_argument('--roc', dest='roc', default=False, action='store_true', help="ROC curve of selected attribute vectors") parser.add_argument("--attribute-vectors", dest='attribute_vectors', default=None, help="use json file as source of attribute vectors") parser.add_argument( "--attribute-thresholds", dest='attribute_thresholds', default=None, help="use these non-zero values for binary classifier thresholds") parser.add_argument('--attribute-indices', dest='attribute_indices', default=None, type=str, help="indices to select specific attribute vectors") parser.add_argument( "--balanced2", dest='balanced2', type=str, default=None, help="Balanced two attributes and generate atvec. eg: 20,31") parser.add_argument( "--balanced", dest='balanced', type=str, default=None, help="Balance attributes and generate atvec. eg: 20,21,31") parser.add_argument("--avg-diff", dest='avg_diff', type=str, default=None, help="Two lists of vectors to average and then diff") parser.add_argument('--outfile', dest='outfile', default=None, help="Output json file for vectors.") args = parser.parse_args(args) if args.avg_diff: vecs1, vecs2 = args.avg_diff.split(",") encoded1 = json_list_to_array(vecs1) encoded2 = json_list_to_array(vecs2) print("Taking the difference between {} and {} vectors".format( len(encoded1), len(encoded2))) m1 = np.mean(encoded1, axis=0) m2 = np.mean(encoded2, axis=0) atvec = m2 - m1 z_dim, = atvec.shape atvecs = atvec.reshape(1, z_dim) print("Computed diff shape: {}".format(atvecs.shape)) if args.outfile is not None: save_json_attribs(atvecs, args.outfile) sys.exit(0) encoded = json_list_to_array(args.encoded_vectors) num_rows, z_dim = encoded.shape if args.dataset: attribs = np.array( list( get_dataset_iterator(args.dataset, args.split, include_features=False, include_targets=True))) else: attribs = get_attribs_from_file(args.labels) print("encoded vectors: {}, attributes: {} ".format( encoded.shape, attribs.shape)) if args.roc: atvecs = get_json_vectors(args.attribute_vectors) dim = len(atvecs[0]) chosen_vector = offset_from_string(args.attribute_indices, atvecs, dim) if args.attribute_thresholds is not None: atvec_thresholds = get_json_vectors(args.attribute_thresholds) threshold = atvec_thresholds[0][int(args.attribute_indices)] else: threshold = None do_roc(chosen_vector, encoded, attribs, int(args.attribute_indices), threshold, args.outfile) sys.exit(0) if args.thresh: atvecs = get_json_vectors(args.attribute_vectors) do_thresh(atvecs, encoded, attribs, args.outfile) sys.exit(0) if (args.balanced2): indexes = map(int, args.balanced2.split(",")) with_attr, without_attr = get_balanced_averages2( attribs, encoded, indexes[0], indexes[1]) num_attribs = 2 elif (args.balanced): indexes = map(int, args.balanced.split(",")) with_attr, without_attr = get_balanced_averages( attribs, encoded, indexes) num_attribs = len(indexes) else: with_attr, without_attr = get_averages(attribs, encoded, args.num_attribs) num_attribs = args.num_attribs atvects = averages_to_attribute_vectors(with_attr, without_attr, num_attribs, z_dim) print("Computed atvecs shape: {}".format(atvects.shape)) if args.outfile is not None: save_json_attribs(atvects, args.outfile)
def atvec(parser, context, args): parser.add_argument('--dataset', dest='dataset', default=None, help="Source dataset (for labels).") # memo: --labels became --attributes when --classes was added parser.add_argument('--attributes', dest='attributes', default=None, help="Text file with 0/1 labels.") parser.add_argument('--classes', dest='classes', default=None, help="Text file with 0/1/2/.../num-classes-1 labels.") parser.add_argument( '--split', dest='split', default="train", help= "Which split to use from the dataset (train/nontrain/valid/test/any).") parser.add_argument("--num-attribs", dest='num_attribs', type=int, default=40, help="Number of attributes (labes)") parser.add_argument( "--which-attribs", type=str, default=None, help="optional comma separated list of attributes to run") parser.add_argument( "--num-classes", dest='num_classes', type=int, default=None, help="For multiclass, number of classes (assumed 0 .. n-1)") parser.add_argument("--z-dim", dest='z_dim', type=int, default=100, help="z dimension of vectors") parser.add_argument("--encoded-vectors", type=str, default=None, help="Comma separated list of json arrays") parser.add_argument("--encoded-true", type=str, default=None, help="Comma separated list of json arrays (true)") parser.add_argument("--encoded-false", type=str, default=None, help="Comma separated list of json arrays (false)") parser.add_argument( '--thresh', dest='thresh', default=False, action='store_true', help="Compute thresholds for attribute vectors classifiers") parser.add_argument('--svm', dest='svm', default=False, action='store_true', help="Use SVM for computing attribute vectors") parser.add_argument("--limit", dest='limit', type=int, default=None, help="Limit number of inputs when computing atvecs") parser.add_argument('--roc', dest='roc', default=False, action='store_true', help="ROC curve of selected attribute vectors") parser.add_argument("--attribute-vectors", dest='attribute_vectors', default=None, help="use json file as source of attribute vectors") parser.add_argument( "--attribute-thresholds", dest='attribute_thresholds', default=None, help="use these non-zero values for binary classifier thresholds") parser.add_argument("--attribute-set", dest='attribute_set', default="all", help="score ROC/accuracy against true/false/all") parser.add_argument('--attribute-indices', dest='attribute_indices', default=None, type=str, help="indices to select specific attribute vectors") parser.add_argument( "--balanced2", dest='balanced2', type=str, default=None, help="Balanced two attributes and generate atvec. eg: 20,31") parser.add_argument( "--balanced", dest='balanced', type=str, default=None, help="Balance attributes and generate atvec. eg: 20,21,31") parser.add_argument("--avg-diff", dest='avg_diff', type=str, default=None, help="Two lists of vectors to average and then diff") parser.add_argument( "--svm-diff", dest='svm_diff', type=str, default=None, help="Two lists of vectors to average and then svm diff") parser.add_argument('--outfile', dest='outfile', default=None, help="Output json file for vectors.") args = parser.parse_args(args) if args.avg_diff: vecs1, vecs2 = args.avg_diff.split(",") encoded1 = json_list_to_array(vecs1) encoded2 = json_list_to_array(vecs2) print("Taking the difference between {} and {} vectors".format( len(encoded1), len(encoded2))) m1 = np.mean(encoded1, axis=0) m2 = np.mean(encoded2, axis=0) atvec = m2 - m1 z_dim, = atvec.shape atvecs = atvec.reshape(1, z_dim) print("Computed diff shape: {}".format(atvecs.shape)) if args.outfile is not None: save_json_attribs(atvecs, args.outfile) sys.exit(0) if args.svm_diff: vecs1, vecs2 = args.svm_diff.split(",") encoded1 = json_list_to_array(vecs1) encoded2 = json_list_to_array(vecs2) print("Taking the svm difference between {} and {} vectors".format( len(encoded1), len(encoded2))) h = .02 # step size in the mesh C = 1.0 # SVM regularization parameter X_arr = [] y_arr = [] for l in range(len(encoded1)): X_arr.append(encoded1[l]) y_arr.append(False) for l in range(len(encoded2)): X_arr.append(encoded2[l]) y_arr.append(True) X = np.array(X_arr) y = np.array(y_arr) # svc = svm.LinearSVC(C=C, class_weight="balanced").fit(X, y) svc = svm.LinearSVC(C=C).fit(X, y) # get the separating hyperplane w = svc.coef_[0] #FIXME: this is a scaling hack. m1 = np.mean(encoded1, axis=0) m2 = np.mean(encoded2, axis=0) mean_vector = m1 - m2 mean_length = np.linalg.norm(mean_vector) svn_length = np.linalg.norm(w) atvec = (mean_length / svn_length) * w z_dim, = atvec.shape atvecs = atvec.reshape(1, z_dim) print("Computed svm diff shape: {}".format(atvecs.shape)) if args.outfile is not None: save_json_attribs(atvecs, args.outfile) sys.exit(0) print("reading encoded vectors...") attribs = None if args.encoded_vectors is not None: if args.encoded_vectors.endswith("json"): encoded = json_list_to_array(args.encoded_vectors) print("Read json array: {}".format(encoded.shape)) else: encoded = np.load(args.encoded_vectors)['arr_0'] print("Read numpy array: {}".format(encoded.shape)) else: if args.encoded_true.endswith("json"): encoded_true = json_list_to_array(args.encoded_true) print("Read true json array: {}".format(encoded_true.shape)) else: encoded_true = np.load(args.encoded_true)['arr_0'] print("Read true numpy array: {}".format(encoded_true.shape)) if args.encoded_false.endswith("json"): encoded_false = json_list_to_array(args.encoded_false) print("Read false json array: {}".format(encoded_false.shape)) else: encoded_false = np.load(args.encoded_false)['arr_0'] print("Read false numpy array: {}".format(encoded_false.shape)) encoded = np.concatenate((encoded_true, encoded_false), axis=0) num_true = len(encoded_true) num_false = len(encoded_false) true_values = np.ones(shape=[num_true, 1, 1], dtype=np.int) false_values = np.zeros(shape=[num_false, 1, 1], dtype=np.int) attribs = np.concatenate((true_values, false_values), axis=0) if args.limit is not None: encoded = encoded[:args.limit] num_rows, z_dim = encoded.shape if attribs is None: print("reading attributes...") if args.dataset: attribs = np.array( list( get_dataset_iterator(args.dataset, args.split, include_features=False, include_targets=True))) print("Read attributes from dataset: {}".format(attribs.shape)) elif args.attributes is not None: print("Read attributes from file: {}".format(args.attributes)) attribs = get_attribs_from_files(args.attributes) elif args.classes is not None: print("Read attributes from file: {}".format(args.classes)) attribs = get_attribs_from_class_file(args.classes, args.num_classes) else: print( "Don't know how to get labels: try --attributes or --classes") sys.exit(1) if args.which_attribs is not None: attribs = filter_attributes(attribs, args.which_attribs) print("encoded vectors: {}, attributes: {} ".format( encoded.shape, attribs.shape)) if args.roc: atvecs = get_json_vectors(args.attribute_vectors) dim = len(atvecs[0]) chosen_vector = offset_from_string(args.attribute_indices, atvecs, dim) if args.attribute_thresholds is not None: atvec_thresholds = get_json_vectors(args.attribute_thresholds) threshold = atvec_thresholds[0][int(args.attribute_indices)] else: threshold = None do_roc(chosen_vector, encoded, attribs, int(args.attribute_indices), threshold, args.attribute_set, args.outfile, isclass=False) # do_roc(chosen_vector, encoded, attribs, int(args.attribute_indices), threshold, args.attribute_set, args.outfile, isclass=(args.num_classes is not None)) sys.exit(0) if args.thresh: atvecs = get_json_vectors(args.attribute_vectors) do_thresh(atvecs, encoded, attribs, args.outfile, isclass=(args.num_classes is not None)) sys.exit(0) if (args.balanced2): indexes = map(int, args.balanced2.split(",")) with_attr, without_attr = get_balanced_averages2( attribs, encoded, indexes[0], indexes[1]) num_attribs = 2 elif (args.balanced): indexes = map(int, args.balanced.split(",")) with_attr, without_attr = get_balanced_averages( attribs, encoded, indexes) num_attribs = len(indexes) # I can't remember why # elif args.num_classes is not None: # with_attr, without_attr = get_class_averages(attribs, encoded, args.num_classes); # num_attribs = args.num_classes elif args.num_attribs is not None: with_attr, without_attr = get_averages(attribs, encoded) num_attribs = args.num_attribs else: print("I think we need either num_classes or num_attribs or something") sys.exit(0) if args.svm: atvects = averages_to_svm_attribute_vectors(with_attr, without_attr) else: atvects = averages_to_attribute_vectors(with_attr, without_attr) print("Computed atvecs shape: {}".format(atvects.shape)) if args.outfile is not None: save_json_attribs(atvects, args.outfile)
def atvec(parser, context, args): parser.add_argument('--dataset', dest='dataset', default=None, help="Source dataset (for labels).") # memo: --labels became --attributes when --classes was added parser.add_argument('--attributes', dest='attributes', default=None, help="Text file with 0/1 labels.") parser.add_argument('--classes', dest='classes', default=None, help="Text file with 0/1/2/.../num-classes-1 labels.") parser.add_argument('--split', dest='split', default="train", help="Which split to use from the dataset (train/nontrain/valid/test/any).") parser.add_argument("--num-attribs", dest='num_attribs', type=int, default=40, help="Number of attributes (labes)") parser.add_argument("--which-attribs", type=str, default=None, help="optional comma separated list of attributes to run") parser.add_argument("--num-classes", dest='num_classes', type=int, default=None, help="For multiclass, number of classes (assumed 0 .. n-1)") parser.add_argument("--z-dim", dest='z_dim', type=int, default=100, help="z dimension of vectors") parser.add_argument("--encoded-vectors", type=str, default=None, help="Comma separated list of json arrays") parser.add_argument("--encoded-true", type=str, default=None, help="Comma separated list of json arrays (true)") parser.add_argument("--encoded-false", type=str, default=None, help="Comma separated list of json arrays (false)") parser.add_argument('--thresh', dest='thresh', default=False, action='store_true', help="Compute thresholds for attribute vectors classifiers") parser.add_argument('--svm', dest='svm', default=False, action='store_true', help="Use SVM for computing attribute vectors") parser.add_argument("--limit", dest='limit', type=int, default=None, help="Limit number of inputs when computing atvecs") parser.add_argument('--roc', dest='roc', default=False, action='store_true', help="ROC curve of selected attribute vectors") parser.add_argument("--attribute-vectors", dest='attribute_vectors', default=None, help="use json file as source of attribute vectors") parser.add_argument("--attribute-thresholds", dest='attribute_thresholds', default=None, help="use these non-zero values for binary classifier thresholds") parser.add_argument("--attribute-set", dest='attribute_set', default="all", help="score ROC/accuracy against true/false/all") parser.add_argument('--attribute-indices', dest='attribute_indices', default=None, type=str, help="indices to select specific attribute vectors") parser.add_argument("--balanced2", dest='balanced2', type=str, default=None, help="Balanced two attributes and generate atvec. eg: 20,31") parser.add_argument("--balanced", dest='balanced', type=str, default=None, help="Balance attributes and generate atvec. eg: 20,21,31") parser.add_argument("--avg-diff", dest='avg_diff', type=str, default=None, help="Two lists of vectors to average and then diff") parser.add_argument("--svm-diff", dest='svm_diff', type=str, default=None, help="Two lists of vectors to average and then svm diff") parser.add_argument('--outfile', dest='outfile', default=None, help="Output json file for vectors.") args = parser.parse_args(args) if args.avg_diff: vecs1, vecs2 = args.avg_diff.split(",") encoded1 = json_list_to_array(vecs1) encoded2 = json_list_to_array(vecs2) print("Taking the difference between {} and {} vectors".format(len(encoded1), len(encoded2))) m1 = np.mean(encoded1,axis=0) m2 = np.mean(encoded2,axis=0) atvec = m2 - m1 z_dim, = atvec.shape atvecs = atvec.reshape(1,z_dim) print("Computed diff shape: {}".format(atvecs.shape)) if args.outfile is not None: save_json_attribs(atvecs, args.outfile) sys.exit(0) if args.svm_diff: vecs1, vecs2 = args.svm_diff.split(",") encoded1 = json_list_to_array(vecs1) encoded2 = json_list_to_array(vecs2) print("Taking the svm difference between {} and {} vectors".format(len(encoded1), len(encoded2))) h = .02 # step size in the mesh C = 1.0 # SVM regularization parameter X_arr = [] y_arr = [] for l in range(len(encoded1)): X_arr.append(encoded1[l]) y_arr.append(False) for l in range(len(encoded2)): X_arr.append(encoded2[l]) y_arr.append(True) X = np.array(X_arr) y = np.array(y_arr) # svc = svm.LinearSVC(C=C, class_weight="balanced").fit(X, y) svc = svm.LinearSVC(C=C).fit(X, y) # get the separating hyperplane w = svc.coef_[0] #FIXME: this is a scaling hack. m1 = np.mean(encoded1,axis=0) m2 = np.mean(encoded2,axis=0) mean_vector = m1 - m2 mean_length = np.linalg.norm(mean_vector) svn_length = np.linalg.norm(w) atvec = (mean_length / svn_length) * w z_dim, = atvec.shape atvecs = atvec.reshape(1,z_dim) print("Computed svm diff shape: {}".format(atvecs.shape)) if args.outfile is not None: save_json_attribs(atvecs, args.outfile) sys.exit(0) print("reading encoded vectors...") attribs = None if args.encoded_vectors is not None: if args.encoded_vectors.endswith("json"): encoded = json_list_to_array(args.encoded_vectors) print("Read json array: {}".format(encoded.shape)) else: encoded = np.load(args.encoded_vectors)['arr_0'] print("Read numpy array: {}".format(encoded.shape)) else: if args.encoded_true.endswith("json"): encoded_true = json_list_to_array(args.encoded_true) print("Read true json array: {}".format(encoded_true.shape)) else: encoded_true = np.load(args.encoded_true)['arr_0'] print("Read true numpy array: {}".format(encoded_true.shape)) if args.encoded_false.endswith("json"): encoded_false = json_list_to_array(args.encoded_false) print("Read false json array: {}".format(encoded_false.shape)) else: encoded_false = np.load(args.encoded_false)['arr_0'] print("Read false numpy array: {}".format(encoded_false.shape)) encoded = np.concatenate((encoded_true, encoded_false), axis=0) num_true = len(encoded_true) num_false = len(encoded_false) true_values = np.ones(shape=[num_true,1,1], dtype=np.int) false_values = np.zeros(shape=[num_false,1,1], dtype=np.int) attribs = np.concatenate((true_values, false_values), axis=0) if args.limit is not None: encoded = encoded[:args.limit] num_rows, z_dim = encoded.shape if attribs is None: print("reading attributes...") if args.dataset: attribs = np.array(list(get_dataset_iterator(args.dataset, args.split, include_features=False, include_targets=True))) print("Read attributes from dataset: {}".format(attribs.shape)) elif args.attributes is not None: print("Read attributes from file: {}".format(args.attributes)) attribs = get_attribs_from_files(args.attributes) elif args.classes is not None: print("Read attributes from file: {}".format(args.classes)) attribs = get_attribs_from_class_file(args.classes, args.num_classes) else: print("Don't know how to get labels: try --attributes or --classes") sys.exit(1) if args.which_attribs is not None: attribs = filter_attributes(attribs, args.which_attribs) print("encoded vectors: {}, attributes: {} ".format(encoded.shape, attribs.shape)) if args.roc: atvecs = get_json_vectors(args.attribute_vectors) dim = len(atvecs[0]) chosen_vector = offset_from_string(args.attribute_indices, atvecs, dim) if args.attribute_thresholds is not None: atvec_thresholds = get_json_vectors(args.attribute_thresholds) threshold = atvec_thresholds[0][int(args.attribute_indices)] else: threshold = None do_roc(chosen_vector, encoded, attribs, int(args.attribute_indices), threshold, args.attribute_set, args.outfile, isclass=False) # do_roc(chosen_vector, encoded, attribs, int(args.attribute_indices), threshold, args.attribute_set, args.outfile, isclass=(args.num_classes is not None)) sys.exit(0) if args.thresh: atvecs = get_json_vectors(args.attribute_vectors) do_thresh(atvecs, encoded, attribs, args.outfile, isclass=(args.num_classes is not None)) sys.exit(0) if(args.balanced2): indexes = map(int, args.balanced2.split(",")) with_attr, without_attr = get_balanced_averages2(attribs, encoded, indexes[0], indexes[1]); num_attribs = 2 elif(args.balanced): indexes = map(int, args.balanced.split(",")) with_attr, without_attr = get_balanced_averages(attribs, encoded, indexes); num_attribs = len(indexes) # I can't remember why # elif args.num_classes is not None: # with_attr, without_attr = get_class_averages(attribs, encoded, args.num_classes); # num_attribs = args.num_classes elif args.num_attribs is not None: with_attr, without_attr = get_averages(attribs, encoded); num_attribs = args.num_attribs else: print("I think we need either num_classes or num_attribs or something") sys.exit(0); if args.svm: atvects = averages_to_svm_attribute_vectors(with_attr, without_attr) else: atvects = averages_to_attribute_vectors(with_attr, without_attr) print("Computed atvecs shape: {}".format(atvects.shape)) if args.outfile is not None: save_json_attribs(atvects, args.outfile)