Example #1
0
def infer_receiver(unused_argv):
    hparams = create_hparams(FLAGS)
    input_vocab = load_obj('input_vocab')
    label_vocab = load_obj('label_vocab')

    with tf.Session() as sess:
        #Before training started, need to check if user need to recover any pre-trained model
        cnn = TextCNN(hparams=hparams,
                      mode=tf.contrib.learn.ModeKeys.TRAIN,
                      source_vocab_table=input_vocab,
                      target_vocab_table=label_vocab,
                      scope=None,
                      extra_args=None)

        #tf.global_variables_initializer().run()
        saver = tf.train.Saver()
        chpt = tf.train.latest_checkpoint(hparams.restore_checkpoint)

        if chpt:
            if tf.train.checkpoint_exists(chpt):
                saver.restore(sess, chpt)
                print("Model has been restored from %s" %
                      (hparams.restore_checkpoint))
        else:
            print("No existing model loaded from %s, exiting" %
                  (hparams.restore_checkpoint))
            return 0

        #if hparams.debug is True:
        #sess = tf_debug.LocalCLIDebugWrapperSession(sess)

        eval(sess, cnn)
Example #2
0
def main():
    day = 27#input("What day do you want to simulate?")
    verbose = 0#input("Verbose? (to see all stations 1, to just see overloaded ones 0)")
    
    data = load_obj("4day")
    start_times, end_times = initialize_start_end(data, day)
    stations = load_obj("stationsq1q2")
    stations = transform(stations)
    initialize_stations(stations)
    
    # pp = pprint.PrettyPrinter(indent=4)
    # pp.pprint(stations)
    # pp.pprint(end_times)
    curr_time = [int(i) for i in "00:00:00".split(":")]
    end_time = [int(i) for i in "24:00:00".split(":")]

    while greater_than(end_time, curr_time):
        # ipdb.set_trace(context=5)
        advance_time(curr_time, 60*5)
        start_times, end_times = update_stations(curr_time, stations, start_times, end_times)
        bad_stations = check_for_errors(stations)
        pp = pprint.PrettyPrinter(indent=4)
        # if verbose == 1 and bad_stations:
        #     pp.pprint(stations)
        # if verbose == 0 and bad_stations:
        #     pp.pprint(bad_stations)
        #     input()
    pp.pprint(bad_stations)
Example #3
0
def generate_email_receiver(session, model, starting_text='<eos>'):
    """Generate text from the model.
    Args:
        session: tf.Session() object
        model: Object of type RNNLM_Model
        config: A Config() object
        starting_text: Initial text passed to model.
    Returns:
        output: List of word idxs
    """
    # state = model.initial_state.eval()
    # Imagine tokens as a batch size of one, length of len(tokens[0])
    input_vocab = load_obj('input_vocab')
    label_vocab = load_obj('label_vocab')
    tokens = input_vocab.encode_word_list(starting_text.split())
    tokens_length = len(tokens)

    # Pad with zero if token_length is smaller than sequence_lenth, otherwise cut it to sequence_length
    tokens = tokens+[0]*(model.sequence_length-tokens_length) if tokens_length <= model.sequence_length \
                                                              else tokens[0:model.sequence_length]

    #Convert the tokens to np array
    tokens = np.array(tokens)
    #print np.shape(tokens)

    #axis = 0 shall indicate the the batch number, in this evaluation case it is 1
    tokens = np.expand_dims(tokens, axis=0)

    feed = {model.input_placeholder: tokens, model.dropout_placeholder: 1}

    y_pred = session.run(model.predictions, feed_dict=feed)
    #print np.shape(y_pred)
    #print np.shape(y_pred)
    #y_pred = np.squeeze(y_pred)

    #print ("(VAL) Evaluating the model using val dataset")
    # print prediction
    #y_pred = np.expand_dims(y_pred,axis=0)
    #print np.shape(y_pred)
    _, prediction_result = label_vector_to_index(y_pred, label_vocab)
    #print prediction_result

    #The 1st dimension of prediction_result could be batch_size, in this case since batch size is 1, so returen the 0 index
    prediction_result = prediction_result[0]

    #print np.shape(prediction_result)
    #print prediction_result

    return prediction_result
Example #4
0
    def RMSE(p, t, verbose=False, subject='', gp=False):

        # Base Results:
        base_results = {
            'start_n': [12, 11, 12, 9],
            'start_mean': [20.75, 12.91, 5.58, 27.33],
            'start_std': [22.59, 9.87, 8.83, 30.49],
            'switch_n': [16, 10, 14, 19, 13, 14, 14, 8],
            'switch_mean': [11.13, 16, 15.29, 11.47, 11.69, 38.71, 6.86, 23.25],
            'switch_std': [8.39, 13.13, 22.06, 13.38, 11.41, 28.18, 6.55, 26.44]
        }

        if subject:
            base_r = util.load_obj('base_results')
            base_results = {
                'st_trial': base_r[subject][0],
                'st_try': base_r[subject][1],
                'sw_trial': base_r[subject][2],
                'sw_try': base_r[subject][3],
            }

        prediction = np.array(p)
        target = np.array(base_results[t])
        if gp:
            target = np.array(t)
        if verbose:
            print(f"Model Results: \n{prediction} \nBase Results: \n{target}\n")

        return np.sqrt(np.mean((prediction - target) ** 2))
Example #5
0
def load_index():
    if not util.is_saved(INVERTED_INDEX_FILE_NAME) and not util.is_saved(
            DOC_ID_NAME_INDEX_NAME) and not util.is_saved(
                TFIDF_NAME_INDEX_NAME):
        build_index()
    else:
        print('Found cached indexes! Using them ;)')
    _inverted_index: dict[str, indexer.Posting] = util.load_obj(
        INVERTED_INDEX_FILE_NAME)
    _doc_id_name_index: dict[int, str] = util.load_obj(DOC_ID_NAME_INDEX_NAME)
    _tfidf = util.load_pickle_as_pandas_df(TFIDF_NAME_INDEX_NAME)
    return {
        'tfidf': _tfidf,
        'inverted': _inverted_index,
        'did_name': _doc_id_name_index
    }
Example #6
0
def GetGeoTree(all=False):
    # print os.path.exists('geotree')
    if os.path.exists('geotree'):
        tree = util.load_obj('geotree')
        return tree

    geo_hash = pandas.read_csv(
        'tianchi_mobile_recommend_train_user.csv.subset.csv')
    geo_hash = geo_hash.dropna()
    geo_count = dict()

    rule = [(0, 0), (1, 1e5), (2, 1e5), (3, 1e5), (4, 1e4), (5, 1e3), (6, 1e3)]
    for r in rule:
        if r[0] == 0:
            split_list = ['9', 'm', 'f']
            for i in geo_hash['user_geohash']:
                util.IncDict(geo_count, i[:1])
        else:
            split_list = [
                i for i in geo_count.keys()
                if geo_count[i] > r[1] and len(i) == r[0]
            ]
            for i in geo_hash['user_geohash']:
                if i[:r[0]] in split_list:
                    util.IncDict(geo_count, i[:r[0] + 1])

    util.save_obj(geo_count, 'geotree')
    if all:
        return geo_count
    else:
        geo_tree = {
            i: geo_count[i]
            for i in geo_count.keys() if geo_count[i] > 1e5 or len(i) == 1
        }
    return geo_tree
Example #7
0
def unify_features():
    train_labels = pd.read_csv(
        '~/Documents/thesis/dataset/dataSample/trainLabels.csv')

    section_features = load_obj('section_features')
    xref_features = load_obj('xref_features')
    opcode_1gram_features = load_obj('1gram_opcode_tfidf')
    byte_1gram_features = load_obj('1gram_byte_tfidf')

    # concat features with classes and IDs to create the dataset
    data = pd.concat([train_labels, xref_features, section_features, opcode_1gram_features, \
            byte_1gram_features], axis=1, sort=False)
    print(data.shape)
    save_obj(data, 'interim_data')

    return data
Example #8
0
    def initializeGL(self):
        gl.glClearColor(1.0, 1.0, 1.0, 0.0)
        # gl.glColor3f(0.0,0.0, 0.0)
        # gl.glPointSize(4.0)
        # gl.glMatrixMode(gl.GL_PROJECTION)
        # gl.glLoadIdentity()
        # glu.gluOrtho2D(0.0,640.0,0.0,480.0)
        gl.glViewport(0, 0, 800, 600)
        gl.glClearColor(0.0, 0.5, 0.5, 1.0)
        gl.glEnableClientState(gl.GL_VERTEX_ARRAY)

        self._vertices, self._normals, self._indices = \
            util.load_obj_with_index("monkey.obj")
        a, b = util.load_obj("monkey.obj")
        self._vbo = gl.glGenBuffers(1)
        gl.glBindBuffer(gl.GL_ARRAY_BUFFER, self._vbo)
        gl.glBufferData(gl.GL_ARRAY_BUFFER, self._vertices.size * 4,
                        self._vertices, gl.GL_STATIC_DRAW)

        self._normal_vbo = gl.glGenBuffers(1)
        gl.glBindBuffer(gl.GL_ARRAY_BUFFER, self._normal_vbo)
        gl.glBufferData(gl.GL_ARRAY_BUFFER, self._normals.size * 4,
                        self._normals, gl.GL_STATIC_DRAW)

        self._index_buffer = gl.glGenBuffers(1)
        gl.glBindBuffer(gl.GL_ELEMENT_ARRAY_BUFFER, self._index_buffer)
        gl.glBufferData(gl.GL_ELEMENT_ARRAY_BUFFER,
                        len(self._indices) * 4,
                        (ctypes.c_uint * len(self._indices))(*self._indices),
                        gl.GL_STATIC_DRAW)

        self._shader_program = shader.LoadShaders("shader9.vs", "shader9.ps")
        gl.glEnable(gl.GL_DEPTH_TEST)
        gl.glDepthFunc(gl.GL_LESS)
Example #9
0
    def initializeGL(self):
        gl.glClearColor(1.0,1.0,1.0,0.0)
        # gl.glColor3f(0.0,0.0, 0.0)
        # gl.glPointSize(4.0)
        # gl.glMatrixMode(gl.GL_PROJECTION)
        # gl.glLoadIdentity()
        # glu.gluOrtho2D(0.0,640.0,0.0,480.0)
        gl.glViewport (0, 0, 800, 600)
        gl.glClearColor (0.0, 0.5, 0.5, 1.0)
        gl.glEnableClientState (gl.GL_VERTEX_ARRAY)

        self._vertices, self._normals = util.load_obj("monkey.obj")
        self._vbo = gl.glGenBuffers(1)
        gl.glBindBuffer (gl.GL_ARRAY_BUFFER, self._vbo)
        gl.glBufferData (gl.GL_ARRAY_BUFFER, self._vertices.size*4,
                        self._vertices, gl.GL_STATIC_DRAW)

        self._normal_vbo = gl.glGenBuffers(1)
        gl.glBindBuffer (gl.GL_ARRAY_BUFFER, self._normal_vbo)
        gl.glBufferData (gl.GL_ARRAY_BUFFER, self._normals.size*4,
                        self._normals, gl.GL_STATIC_DRAW)

        self._shader_program = shader.LoadShaders("shader8.vs",
                                                  "shader8.ps")
        gl.glEnable(gl.GL_DEPTH_TEST)
        gl.glDepthFunc(gl.GL_LESS)
Example #10
0
def GetGeoTree(all = False):
	# print os.path.exists('geotree')
	if os.path.exists('geotree'):
		tree = util.load_obj('geotree')
		return tree
		
	geo_hash = pandas.read_csv('tianchi_mobile_recommend_train_user.csv.subset.csv')
	geo_hash = geo_hash.dropna()
	geo_count=dict()
	
	rule = [(0,0),(1,1e5), (2,1e5),(3,1e5),(4,1e4),(5,1e3),(6,1e3)]
	for r in rule:
		if r[0]==0:
			split_list = ['9','m','f']
			for i in geo_hash['user_geohash']:
				util.IncDict(geo_count, i[:1])
		else:
			split_list=[i for i in geo_count.keys() if geo_count[i]>r[1] and len(i)==r[0] ]
			for i in geo_hash['user_geohash']:
				if i[:r[0]] in split_list:
					util.IncDict(geo_count, i[:r[0]+1])
			

	
	util.save_obj(geo_count, 'geotree')
	if all:
		return geo_count
	else:
		geo_tree = {i:geo_count[i] for i in geo_count.keys() if geo_count[i]>1e5 or len(i)==1}
	return geo_tree
Example #11
0
 def __init__(self, obj_file, name, _id, text):
     self.obj_file = obj_file
     self.name = name
     self._id = _id
     self.text = text
     if obj_file != None:
         self.v, self.f = util.load_obj(obj_file)
Example #12
0
 def get_trees(self):
     if self.trees == []:
         trees = util.load_obj(self.filename)
         if trees is None:
             trees = self._generate_trees()
             util.save_obj(trees, self.filename)
         self.trees = trees
     return self.trees
def quantization_predict(*args):
    imu_measurements = args[0]
    if len(args) == 1:
        # load model
        kmeans = load_obj('kmeans_model.pkl')
    else:
        kmeans = args[1]

    return kmeans.predict(imu_measurements)
Example #14
0
 def get_trees(self):
     if self.trees == []:
         # attempt to load cache
         trees = util.load_obj(self.filename)
         if trees is None or trees == []:  # not cached yet
             trees = self._generate_trees()
             util.save_obj(trees, self.filename) # save cache
         self.trees = trees
     return self.trees
Example #15
0
 def get_trees(self):
     if self.trees == []:
         # attempt to load cache
         trees = util.load_obj(self.filename)
         if trees is None or trees == []:  # not cached yet
             trees = self._generate_trees()
             util.save_obj(trees, self.filename)  # save cache
         self.trees = trees
     return self.trees
Example #16
0
def mapview():
    # creating a map in the view
    fn = "Divvy_Trips_2017_Q3Q4/Divvy_Stations_2017_Q3Q4.csv"
    otherfn = "Divvy_Trips_2017_Q3Q4/Divvy_Trips_2017_Q3.csv"
    data = readdict(fn)
    # other_data = readdict(otherfn)
    # other_data = data_cleanup_missing(other_data)
    # frequency_dictionary, most_common = get_frequency_dictionaries(other_data, other_data[0].keys())
    # print(frequency_dictionary['gender'])

    frequency_dictionary = load_obj('frequency_dictionary')
    # frequency_dictionary, most_common = standard_procedures(otherfn)

    lat = get_attribute(data, "latitude", float)
    lon = get_attribute(data, "longitude", float)
    names = get_attribute(data, "name")
    stations = []
    stations2 = []
    for idx, l in enumerate(lat):
        stations.append((
            lat[idx], lon[idx], names[idx],
            "https://maps.gstatic.com/intl/en_us/mapfiles/markers2/measle_blue.png"
        ))
        stations2.append({
            "lat":
            lat[idx],
            "lng":
            lon[idx],
            "name":
            names[idx],
            "img":
            "https://maps.gstatic.com/intl/en_us/mapfiles/markers2/measle_blue.png"
        })

    mymap = Map(scale=2,
                identifier="view-side",
                lat=41.8781,
                lng=-87.6298,
                markers=stations,
                fit_markers_to_bounds=True,
                style="height:500px;width:100%;")
    # bikeLayer = GoogleMaps.BicyclingLayer
    # BicyclingLayer.setMap(mymap)
    stations2 = json.dumps(stations2)
    # stations2 = stations2.replace("\"","")
    # print(stations2)

    # stations2 = json.loads(stations2)
    # print(stations2)

    # print(stations)

    return render_template('example.html',
                           mymap=mymap,
                           json_stations=stations2,
                           frequencies=json.dumps(frequency_dictionary))
Example #17
0
def model_test():

    hparams = create_or_load_hparams(hparams_file=HFILE, default_hparams=None)
    config = tf.ConfigProto(log_device_placement=hparams.log_device_placement,
                            allow_soft_placement=hparams.allow_soft_placement)
    input_vocab = load_obj(ROOT_PATH, 'general_vocab')
    label_vocab = load_obj(ROOT_PATH, 'mpss_pl_vocab')

    with tf.Session(config=config) as sess:
        cnn = TextCNN(hparams=hparams,
                      mode=tf.contrib.learn.ModeKeys.TRAIN,
                      source_vocab_table=input_vocab,
                      target_vocab_table=label_vocab,
                      scope=None,
                      extra_args=None)

        saver = tf.train.Saver(tf.global_variables(),
                               max_to_keep=hparams.num_checkpoints)
        chpt = tf.train.latest_checkpoint(hparams.restore_checkpoint)
        if chpt:
            if tf.train.checkpoint_exists(chpt):
                saver.restore(sess, chpt)
                print("Model has been resotre from %s" %
                      (hparams.restore_checkpoint))
        else:
            print("No pre-trained model loaded, abort!!!")
            return

        sess.run(tf.local_variables_initializer())

        predict_result = cnn.predict(sess=sess,
                                     input_txt=TEST_EMAIL,
                                     input_vocab=input_vocab,
                                     label_vocab=label_vocab)

        print("Predicted result is %s" % predict_result)
Example #18
0
def to_one_csv():
    DIRPATH = ['./npy_data/tihm15/UTI_mike/', './npy_data/tihmdri/UTI_test/']
    SAVE_PATH = './csv_data/one_csv/data.csv'
    data = {'Patient_id': [], 'Date': [], 'Symptoms': [], 'Validation': []}
    for path in DIRPATH:
        filenames = _iter_directory(path)
        for f in filenames:
            validations = load_obj(path + f)
            for valid in validations[1]:
                data['Patient_id'].append(f.split('.')[0])
                data['Date'].append(valid[0])
                data['Symptoms'].append(valid[1])
                data['Validation'].append(valid[2])
    df = pd.DataFrame(data)
    df.to_csv(SAVE_PATH)
Example #19
0
    def initializeGL(self):
        gl.glClearColor(1.0, 1.0, 1.0, 0.0)
        # gl.glColor3f(0.0,0.0, 0.0)
        # gl.glPointSize(4.0)
        # gl.glMatrixMode(gl.GL_PROJECTION)
        # gl.glLoadIdentity()
        # glu.gluOrtho2D(0.0,640.0,0.0,480.0)
        gl.glViewport(0, 0, 800, 600)
        gl.glClearColor(0.0, 0.5, 0.5, 1.0)
        gl.glEnableClientState(gl.GL_VERTEX_ARRAY)

        self._vertices, self._normals = util.load_obj("monkey.obj")
        self._vbo = gl.glGenBuffers(1)
        # ar = array("f", vertices)
        gl.glBindBuffer(gl.GL_ARRAY_BUFFER, self._vbo)
        gl.glBufferData(gl.GL_ARRAY_BUFFER, self._vertices.size * 4,
                        self._vertices, gl.GL_STATIC_DRAW)

        self._shader_program = shader.LoadShaders("shader7.vs", "shader7.ps")
        gl.glEnable(gl.GL_DEPTH_TEST)
        gl.glDepthFunc(gl.GL_LESS)
Example #20
0
def to_separate_csv():
    DIRPATH = './npy_data/tihm15/Agitation_mike/'
    SAVE_PATH = './csv_data/analysation/mike/agitation/'
    INDEX = [
        'Fridge', 'living room', 'Bathroom', 'Hallway', 'Bedroom', 'Kitchen',
        'Microwave', 'Kettle'
    ]

    filenames = _iter_directory(DIRPATH)
    data = []
    info = []
    for f in filenames:
        a = load_obj(DIRPATH + f)
        data.append(a[0])
        info.append(a[1])

    for i, patient in enumerate(info):
        for j, day in enumerate(patient):
            filename = filenames[i].split('.')[0] + '_' + day[0] + '_' + str(
                day[2]) + '.csv'
            p_data = data[i][j]
            df = pd.DataFrame(p_data, index=INDEX)
            df.to_csv(SAVE_PATH + filename)
Example #21
0
def preprocess(dataset, explainer):

    # Dataset preparation
    data = class_name = None
    dtypes = {}
    encoded_data = None

    if (dataset == 'texas'):  #
        class_name = 'PRINC_SURG_PROC_CODE'
        dtypes = load_obj('data/' + dataset + '/dtypes')
        data = pd.read_csv('data/' + dataset + '/' + dataset + '_mapped.csv',
                           dtype=dtypes)

        columns2remove = ['RECORD_ID', 'PRINC_ICD9_CODE']
        data.drop(columns2remove, inplace=True, axis=1)

        print("Splitting ...")
        bb_train, bb_val, sh_train, sh_val, r2E, test = split(data, class_name)

        bb_train.to_csv('data/' + dataset +
                        '/baseline_split/bb_train_mapped.csv',
                        index=False)
        print("bb_train saved")
        bb_val.to_csv('data/' + dataset + '/baseline_split/bb_val_mapped.csv',
                      index=False)
        print("bb_val saved")
        sh_train.to_csv('data/' + dataset +
                        '/baseline_split/sh_train_mapped.csv',
                        index=False)
        print("sh_train saved")
        sh_val.to_csv('data/' + dataset + '/baseline_split/sh_val_mapped.csv',
                      index=False)
        print("sh_val saved")
        r2E.to_csv('data/' + dataset + '/baseline_split/r2E_mapped.csv',
                   index=False)
        print("r2E saved")
        test.to_csv('data/' + dataset + '/baseline_split/test_mapped.csv',
                    index=False)

    else:
        data, class_name = prepare_dataset(dataset, explainer)
        # Mapping
        mapped_data = map_columns(data, class_name)
        mapped_data.to_csv('data/' + dataset + '/' + dataset + '_mapped.csv')

    # Encoding
    if (dataset == 'adult'):
        class_name = 'class'
        for col in data.columns:
            if (col in ['capital-gain', 'capital-loss']):
                dtypes[col] = 'float32'
            elif (col in ['age', 'hours-per-week']):
                dtypes[col] = 'int64'
            else:
                dtypes[col] = 'object'

    if (dataset == 'mobility'):
        class_name = 'class'
        for col in data.columns:
            if (col in [
                    'max_distance_from_home', 'maximum_distance', 'max_tot',
                    'distance_straight_line', 'sld_avg', 'radius_of_gyration',
                    'norm_uncorrelated_entropy', 'nlr', 'home_freq_avg',
                    'work_freq_avg', 'hf_tot_df', 'wf_tot_df',
                    'n_user_home_avg', 'n_user_work_avg', 'home_entropy',
                    'work_entropy'
            ]):
                dtypes[col] = 'float32'
            elif (col in [
                    'uid', 'wait', 'number_of_visits', 'nv_avg',
                    'number_of_locations', 'raw_home_freq', 'raw_work_freq',
                    'raw_least_freq', 'n_user_home', 'n_user_work'
            ]):
                dtypes[col] = 'int64'
            else:
                dtypes[col] = 'object'

    encoded_data = encode_Dask_dataset(dd.from_pandas(data, npartitions=1),
                                       class_name, dtypes, [])
    #encoded_data, feature_names, class_values, numeric_columns, rdf, real_feature_names, features_map = encode_dataset(data, class_name)
    encoded_data.to_csv('data/' + dataset + '/' + dataset + '_encoded.csv')

    # Splitting both datasets
    bb_train, bb_val, sh_train, sh_val, r2E, test = split(data, class_name)
    bb_train_m, bb_val_m, sh_train_m, sh_val_m, r2E_m, test_m = split(
        mapped_data, class_name)
    bb_train_e, bb_val_e, sh_train_e, sh_val_e, r2E_e, test_e = split(
        encoded_data, class_name)

    # Writing datasets
    if (len(bb_train) + len(bb_val) + len(sh_train) + len(sh_val) + len(r2E) +
            len(test) == len(data)
            and len(bb_train_e) + len(bb_val_e) + len(sh_train_e) +
            len(sh_val_e) + len(r2E_e) + len(test_e) == len(encoded_data)):
        print('Dataset: ' + dataset)
        bb_train.to_csv('data/' + dataset + '/baseline_split/bb_train.csv',
                        index=False)
        bb_train_m.to_csv('data/' + dataset +
                          '/baseline_split/bb_train_mapped.csv',
                          index=False)
        bb_train_e.to_csv('data/' + dataset + '/baseline_split/bb_train_e.csv',
                          index=False)
        print("bb_train saved")
        bb_val.to_csv('data/' + dataset + '/baseline_split/bb_val.csv',
                      index=False)
        bb_val_m.to_csv('data/' + dataset +
                        '/baseline_split/bb_val_mapped.csv',
                        index=False)
        bb_val_e.to_csv('data/' + dataset + '/baseline_split/bb_val_e.csv',
                        index=False)
        print("bb_val saved")
        sh_train.to_csv('data/' + dataset + '/baseline_split/sh_train.csv',
                        index=False)
        sh_train_m.to_csv('data/' + dataset +
                          '/baseline_split/sh_train_mapped.csv',
                          index=False)
        sh_train_e.to_csv('data/' + dataset + '/baseline_split/sh_train_e.csv',
                          index=False)
        print("sh_train saved")
        sh_val.to_csv('data/' + dataset + '/baseline_split/sh_val.csv',
                      index=False)
        sh_val_m.to_csv('data/' + dataset +
                        '/baseline_split/sh_val_mapped.csv',
                        index=False)
        sh_val_e.to_csv('data/' + dataset + '/baseline_split/sh_val_e.csv',
                        index=False)
        print("sh_val saved")
        r2E.to_csv('data/' + dataset + '/baseline_split/r2E.csv', index=False)
        r2E_m.to_csv('data/' + dataset + '/baseline_split/r2E_mapped.csv',
                     index=False)
        r2E_e.to_csv('data/' + dataset + '/baseline_split/r2E_e.csv',
                     index=False)
        print("r2E saved")
        test.to_csv('data/' + dataset + '/baseline_split/test.csv',
                    index=False)
        test_m.to_csv('data/' + dataset + '/baseline_split/test_mapped.csv',
                      index=False)
        test_e.to_csv('data/' + dataset + '/baseline_split/test_e.csv',
                      index=False)
        print("test saved")
    else:
        print("Error in splitted datasets sizes")
    with torch.no_grad():
        for i, (im, _) in enumerate(target_dataset):
            frame_tensor = torch.zeros(len(target_dataset))
            frame_tensor[j] = 1
            frame_tensor = frame_tensor.cuda()

            deltas = torch.matmul(M3, torch.matmul(M2, torch.matmul(M1, frame_tensor))).flatten()
            deformed_vtxs = (vtx_pos.flatten() + deltas).reshape((vtx_pos.shape[0], 3))
            deformed_vtxs = torch.clamp(deformed_vtxs, -1.0, 1.0)

            #write_obj(f"frame_{i}.obj", deformed_vtxs.detach().cpu().tolist(), pos_idx.detach().cpu().tolist())
            util.write_obj(f"frame_{i}.obj", deformed_vtxs.detach().cpu().tolist(), pos_idx.detach().cpu().tolist(), vtx_col.detach().cpu().tolist())

    np.savez('vtx_col.npz', vtx_col=vtx_col.cpu().detach().numpy())


if __name__ == '__main__':
    # mesh = util.load_obj('sphere.obj')
    mesh = util.load_obj('prediction.obj')
    vtx_pos = mesh['vtx_pos']
    # make all positive
    vtx_pos += vtx_pos.min()
    vtx_pos -= vtx_pos.min()
    vtx_pos /= vtx_pos.max()
    vtx_pos -= 0.5
    mesh['vtx_pos'] = vtx_pos

    for k,v in mesh.items():
        assert(v.shape[1] == 3)

    fit_mesh_col(mesh, 'images/bottle')
    uv, uv_idx = init_uv()
    uv_idx = uv_idx[:pos_idx.shape[0]]
    pos_idx = torch.from_numpy(pos_idx.astype(np.int32)).cuda()
    vtx_pos = torch.from_numpy(pos.astype(np.float32)).cuda()
    uv_idx = torch.from_numpy(uv_idx.astype(np.int32)).cuda()
    vtx_uv = torch.from_numpy(uv.astype(np.float32)).cuda()
    tex = torch.from_numpy(tex.astype(np.float32)).cuda()

    # Render reference and optimized frames. Always enable mipmapping for reference.
    color = render(glctx, r_mvp, vtx_pos, pos_idx, vtx_uv, uv_idx, tex, 1024,
                   False, 0)
    Image.fromarray((color[0].detach().cpu().numpy() * 255).astype(
        np.uint8)).save('test.png')


if __name__ == '__main__':
    mesh = util.load_obj('sphere.obj')
    #mesh = util.load_obj('prediction.obj')
    vtx_pos = mesh['vtx_pos']
    # make all positive
    vtx_pos += vtx_pos.min()
    vtx_pos -= vtx_pos.min()
    vtx_pos /= vtx_pos.max()
    vtx_pos -= 0.5
    mesh['vtx_pos'] = vtx_pos

    for k, v in mesh.items():
        assert (v.shape[1] == 3)

    fit_mesh(mesh, 'images/bottle')
import os
import sys

utils_dir = os.path.dirname(
    os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
utils_dir = os.path.join(utils_dir, 'utils')
sys.path.append(utils_dir)

import util

parent_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))

wwcd = os.path.join(parent_path, 'data', 'intermediate', 'write_with_class')
write_class = util.load_obj(wwcd)

writers = []  # each entry is a (writer, [list of (file, class)]) tuple
cimages = []
(cw, _, _) = write_class[0]
for (w, f, c) in write_class:
    if w != cw:
        writers.append((cw, cimages))
        cw = w
        cimages = [(f, c)]
    cimages.append((f, c))
writers.append((cw, cimages))

ibwd = os.path.join(parent_path, 'data', 'intermediate', 'images_by_writer')
util.save_obj(writers, ibwd)
Example #25
0
def load_treebank(filename):
    return util.load_obj(filename)
Example #26
0
def load_treebank(filename):
    return util.load_obj(filename)
Example #27
0
def run(args, num_workers=1, log_interval=100, verbose=True, save_path=None):
    code_root = os.path.dirname(os.path.realpath(__file__))
    if not os.path.isdir('{}/{}_result_files/'.format(code_root, args.task)):
        os.mkdir('{}/{}_result_files/'.format(code_root, args.task))
    path = '{}/{}_result_files/'.format(
        code_root, args.task) + utils.get_path_from_args(args)
    print('File saved in {}'.format(path))

    if os.path.exists(path + '.pkl') and not args.rerun:
        print('File has already existed. Try --rerun')
        return utils.load_obj(path)

    start_time = time.time()
    utils.set_seed(args.seed)

    # ---------------------------------------------------------
    # -------------------- training ---------------------------

    # initialise model
    model = user_preference_estimator(args).cuda()

    model.train()
    print(sum([param.nelement() for param in model.parameters()]))
    # set up meta-optimiser for model parameters
    meta_optimiser = torch.optim.Adam(model.parameters(), args.lr_meta)
    # scheduler = torch.optim.lr_scheduler.StepLR(meta_optimiser, 5000, args.lr_meta_decay)

    # initialise logger
    logger = Logger()
    logger.args = args
    # initialise the starting point for the meta gradient (it's faster to copy this than to create new object)
    meta_grad_init = [0 for _ in range(len(model.state_dict()))]
    dataloader_train = DataLoader(Metamovie(args),
                                  batch_size=1,
                                  num_workers=args.num_workers)
    for epoch in range(args.num_epoch):

        x_spt, y_spt, x_qry, y_qry = [], [], [], []
        iter_counter = 0
        for step, batch in enumerate(dataloader_train):
            if len(x_spt) < args.tasks_per_metaupdate:
                x_spt.append(batch[0][0].cuda())
                y_spt.append(batch[1][0].cuda())
                x_qry.append(batch[2][0].cuda())
                y_qry.append(batch[3][0].cuda())
                if not len(x_spt) == args.tasks_per_metaupdate:
                    continue

            if len(x_spt) != args.tasks_per_metaupdate:
                continue

            # initialise meta-gradient
            meta_grad = copy.deepcopy(meta_grad_init)
            loss_pre = []
            loss_after = []
            for i in range(args.tasks_per_metaupdate):
                loss_pre.append(F.mse_loss(model(x_qry[i]), y_qry[i]).item())
                fast_parameters = model.final_part.parameters()
                for weight in model.final_part.parameters():
                    weight.fast = None
                for k in range(args.num_grad_steps_inner):
                    logits = model(x_spt[i])
                    loss = F.mse_loss(logits, y_spt[i])
                    grad = torch.autograd.grad(loss,
                                               fast_parameters,
                                               create_graph=True)
                    fast_parameters = []
                    for k, weight in enumerate(model.final_part.parameters()):
                        if weight.fast is None:
                            weight.fast = weight - args.lr_inner * grad[
                                k]  #create weight.fast
                        else:
                            weight.fast = weight.fast - args.lr_inner * grad[k]
                        fast_parameters.append(weight.fast)

                logits_q = model(x_qry[i])
                # loss_q will be overwritten and just keep the loss_q on last update step.
                loss_q = F.mse_loss(logits_q, y_qry[i])
                loss_after.append(loss_q.item())
                task_grad_test = torch.autograd.grad(loss_q,
                                                     model.parameters())

                for g in range(len(task_grad_test)):
                    meta_grad[g] += task_grad_test[g].detach()

            # -------------- meta update --------------

            meta_optimiser.zero_grad()

            # set gradients of parameters manually
            for c, param in enumerate(model.parameters()):
                param.grad = meta_grad[c] / float(args.tasks_per_metaupdate)
                param.grad.data.clamp_(-10, 10)

            # the meta-optimiser only operates on the shared parameters, not the context parameters
            meta_optimiser.step()
            #scheduler.step()
            x_spt, y_spt, x_qry, y_qry = [], [], [], []

            loss_pre = np.array(loss_pre)
            loss_after = np.array(loss_after)
            logger.train_loss.append(np.mean(loss_pre))
            logger.valid_loss.append(np.mean(loss_after))
            logger.train_conf.append(1.96 * np.std(loss_pre, ddof=0) /
                                     np.sqrt(len(loss_pre)))
            logger.valid_conf.append(1.96 * np.std(loss_after, ddof=0) /
                                     np.sqrt(len(loss_after)))
            logger.test_loss.append(0)
            logger.test_conf.append(0)

            utils.save_obj(logger, path)
            # print current results
            logger.print_info(epoch, iter_counter, start_time)
            start_time = time.time()

            iter_counter += 1
        if epoch % (2) == 0:
            print('saving model at iter', epoch)
            logger.valid_model.append(copy.deepcopy(model))

    return logger, model
INDEX_TYPE = 4
INDEX_DIR = "./Indices/INDEX{0}/".format(INDEX_TYPE)

REMOVE_STOP_WORDS_QUERY = False
STEM_QUERY = False
if INDEX_TYPE == 2:
    REMOVE_STOP_WORDS_QUERY = True
elif INDEX_TYPE == 3:
    STEM_QUERY = True
elif INDEX_TYPE >= 4:
    REMOVE_STOP_WORDS_QUERY = True
    STEM_QUERY = True

INDEX = INDEX_DIR + "INDEX.dat"
CATALOG = util.load_obj(INDEX_DIR + "CATALOG.pkl")
DOC_LEN_MAP = util.load_obj(INDEX_DIR + "DOC_LEN_MAP.pkl")
META_DATA = util.load_obj(INDEX_DIR + "META.pkl")
DOC_ID_MAP = util.load_obj('DOC_ID_MAP.pkl')
ID_DOC_MAP = {v : k for k, v in DOC_ID_MAP.items()}

AVG_DOC_LENGTH = META_DATA['average_doc_len']
TOTAL_DOCS = META_DATA['total_docs']
TOTAL_TOKENS = META_DATA['total_tokens']
VOCAB_LEN = META_DATA['vocab_len']

QUERY_FILE = "query_desc.51-100.short.txt"

NO_OF_TOP_RESULTS = 1000

MODELS = ['tfidf', 'bm25', 'laplace', 'rsv', 'prox_rsv']
Example #29
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import util

co_li = util.load_obj('cookies.pkl')
cookies = util.get_cookie_dir(co_li)
print(cookies)
Example #30
0
def GenFeature(finput='user_action_train.csv',
               fitem='tianchi_mobile_recommend_train_item.csv',
               foutput='feature.csv',
               lastday='2014-12-18'):
    geotree = util.load_obj('geotree')

    user_itemgeo_count_before_lastday = dict()
    user_itemgeo_car_before_lastday = dict()
    user_itemgeo_star_before_lastday = dict()
    user_itemgeo_buy_before_lastday = dict()
    user_itemgeo_count_lastday = dict()
    user_itemgeo_car_lastday = dict()
    user_itemgeo_star_lastday = dict()
    user_itemgeo_buy_lastday = dict()

    item_itemgeo = dict()

    user_count_lastday = dict()
    user_star_lastday = dict()
    user_car_lastday = dict()
    user_buy_lastday = dict()
    user_count_before_lastday = dict()
    user_star_before_lastday = dict()
    user_car_before_lastday = dict()
    user_buy_before_lastday = dict()

    user_geocount = dict()

    user_items = set()
    itemfile = open(fitem, 'rb')
    itemreader = csv.reader(itemfile, delimiter=',')

    for row in itemreader:
        tid = row[0]
        if len(row[1]) > 1:
            getItemGeoDict(item_itemgeo, com.GeoMatch(row[1], geotree), tid)
        else:
            item_itemgeo[tid] = []
            item_itemgeo[tid].append(-1)
        #if len(row[1])>1:
        #item_itemgeo[tid] = com.GeoMatch(row[1],geotree)
        #else:
        #item_itemgeo[tid] = -1

    with open(finput, 'rb') as f:
        reader = csv.reader(f, delimiter=',')
        header = reader.next()
        print header

        i = 0

        for row in reader:
            uid = row[0]
            tid = row[1]
            cid = row[4]

            user_items.add('%s_%s' % (uid, tid))

            if tid not in item_itemgeo:
                continue

            if item_itemgeo[tid][0] != -1:

                if row[5][:10] == lastday:
                    if row[2] == '1':
                        for itemgeohash in item_itemgeo[tid]:
                            IncDict(user_itemgeo_count_lastday,
                                    '%s_%s' % (uid, itemgeohash),
                                    len(item_itemgeo[tid]))
                    elif row[2] == '2':
                        for itemgeohash in item_itemgeo[tid]:
                            IncDict(user_itemgeo_star_lastday,
                                    '%s_%s' % (uid, itemgeohash),
                                    len(item_itemgeo[tid]))

                    elif row[2] == '3':
                        for itemgeohash in item_itemgeo[tid]:
                            IncDict(user_itemgeo_car_lastday,
                                    '%s_%s' % (uid, itemgeohash),
                                    len(item_itemgeo[tid]))
                    elif row[2] == '4':
                        for itemgeohash in item_itemgeo[tid]:
                            IncDict(user_itemgeo_buy_lastday,
                                    '%s_%s' % (uid, itemgeohash),
                                    len(item_itemgeo[tid]))
                else:
                    if row[2] == '1':
                        for itemgeohash in item_itemgeo[tid]:
                            IncDict(user_itemgeo_count_before_lastday,
                                    '%s_%s' % (uid, itemgeohash),
                                    len(item_itemgeo[tid]))
                    elif row[2] == '2':
                        for itemgeohash in item_itemgeo[tid]:
                            IncDict(user_itemgeo_star_before_lastday,
                                    '%s_%s' % (uid, itemgeohash),
                                    len(item_itemgeo[tid]))

                    elif row[2] == '3':
                        for itemgeohash in item_itemgeo[tid]:
                            IncDict(user_itemgeo_car_before_lastday,
                                    '%s_%s' % (uid, itemgeohash),
                                    len(item_itemgeo[tid]))
                    elif row[2] == '4':
                        for itemgeohash in item_itemgeo[tid]:
                            IncDict(user_itemgeo_buy_before_lastday,
                                    '%s_%s' % (uid, itemgeohash),
                                    len(item_itemgeo[tid]))

            i = i + 1
            if i % 100000 == 0:
                print 'processed %d scores!' % i

    fd = open(foutput, 'wb')
    fw = csv.writer(fd, delimiter=',')

    fw.writerow([
        'user_itemgeo_count_before_lastday', 'user_itemgeo_car_before_lastday',
        'user_itemgeo_star_before_lastday', 'user_itemgeo_buy_before_lastday',
        'user_itemgeo_count_lastday', 'user_itemgeo_car_lastday',
        'user_itemgeo_star_lastday', 'user_itemgeo_buy_lastday'
    ])

    for key in user_items:
        uid, tid = key.split('_')

        if tid not in item_itemgeo:
            data = ['', '', '', '', '', '', '', '', '', '']
            continue

        if item_itemgeo[tid][0] != -1:
            #data = np.zeros(8)
            temp_count_lastday = 0
            temp_star_lastday = 0
            temp_car_lastday = 0
            temp_buy_lastday = 0
            temp_count_before_lastday = 0
            temp_star_before_lastday = 0
            temp_car_before_lastday = 0
            temp_buy_before_lastday = 0
            for itemgeohash in item_itemgeo[tid]:
                key_uid_itemgeo = '%s_%s' % (uid, itemgeohash)
                #每一个item可能对应多个itemgeo 特征要取均值
                temp_count_lastday += GetDict(user_itemgeo_count_lastday,
                                              key_uid_itemgeo) / len(
                                                  item_itemgeo[tid])
                temp_star_lastday += GetDict(user_itemgeo_star_lastday,
                                             key_uid_itemgeo) / len(
                                                 item_itemgeo[tid])
                temp_car_lastday += GetDict(user_itemgeo_car_lastday,
                                            key_uid_itemgeo) / len(
                                                item_itemgeo[tid])
                temp_buy_lastday += GetDict(user_itemgeo_buy_lastday,
                                            key_uid_itemgeo) / len(
                                                item_itemgeo[tid])
                temp_count_before_lastday += GetDict(
                    user_itemgeo_car_before_lastday, key_uid_itemgeo) / len(
                        item_itemgeo[tid])
                temp_star_before_lastday += GetDict(
                    user_itemgeo_star_before_lastday, key_uid_itemgeo) / len(
                        item_itemgeo[tid])
                temp_car_before_lastday += GetDict(
                    user_itemgeo_car_before_lastday, key_uid_itemgeo) / len(
                        item_itemgeo[tid])
                temp_buy_before_lastday += GetDict(
                    user_itemgeo_buy_before_lastday, key_uid_itemgeo) / len(
                        item_itemgeo[tid])
            data = [
                uid, tid, temp_count_lastday, temp_star_lastday,
                temp_car_lastday, temp_buy_lastday, temp_count_before_lastday,
                temp_star_before_lastday, temp_car_before_lastday,
                temp_buy_before_lastday
            ]
        fw.writerow(data)
        #else:

    com.FillAvgData(foutput, '%s_filled.csv' % foutput, log=True)
Example #31
0
def GetFeature(data):

	
	cluster = util.load_obj('%s_cluster.model' % __fname__)
	cluster_ids = cluster.predict(data[[i for i in data.keys() if i not in ['user_id','item_id','buy']]])
	
	
	data['cluster_0'] = cluster_ids==0 
	data['cluster_1'] = cluster_ids==1 
	data['cluster_2'] = cluster_ids==2
	data['cluster_3'] = cluster_ids==3
	data['cluster_4'] = cluster_ids==4
	
	data['user_is_robot'] = RobotRule(data).astype(int)
	data['item_nouser'] = NouserItem(data).astype(int)
	data['user_left_item'] = UserLeftItem(data).astype(int)
	
	data['user_high_and_active'] = ((data['user_lastday_count']>5) & (data['user_action_count']>100)).astype(int)
	data['item_is_new'] =((data['item_lastday_count']>8) & (data['item_before_halfmonth_click']==0)).astype(int)
	data['user_is_new'] = ((data['user_lastday_count']>0)&(data['user_action_count']<5*data['user_lastday_count'])).astype(int)
	
	data['user_lastday_active'] = ((data['user_lastday_count']>100)).astype(int)
	
	data['user_active_item_active_nobuy'] = ((data['user_lastday_count']>10) & (data['item_lastday_count']>10) & (data['user_item_click_nobuy'])).astype(int)
	
	data['user_like_watch'] =(data['user_action_count']/(1+data['user_lastday_count'])>20) & (data['user_buy_count']==0)
	data['user_almost_buy'] =  ((data['user_item_lastday_click_nobuy']) & (data['user_item_lastday_cart_nobuy']) & (data['user_item_lastday_star_nobuy'])).astype(int)
	
	nolog = ['user_id','item_id', 'buy']
	

	factor_features = [
		"user_high_and_active",
	"item_is_new",
	"user_is_new",
	"user_lastday_active",
	"user_active_item_active_nobuy",
	"user_like_watch",
	
	"cluster_0",
	"cluster_1",
	"cluster_2",
	"cluster_3",
	"cluster_4",
	
	
	
"cat_lastday_buy_again",
"cat_lastday_cart_nobuy",
"cat_lastday_click_nobuy",
"cat_lastday_star_nobuy",
"item_geo_4",
"item_geo_9",
"item_geo_f",
"item_geo_m",
"item_geo_t",
"item_lastday_buy_again",
"item_lastday_cart_nobuy",
"item_lastday_click_nobuy",
"item_lastday_star_nobuy",
"user_cat_lastday_buy_again",
"user_cat_lastday_cart_nobuy",
"user_cat_lastday_click_nobuy",
"user_cat_lastday_star_nobuy",
"user_geo_4",
"user_geo_5",
"user_geo_9",
"user_geo_b",
"user_geo_f",
"user_geo_i",
"user_geo_m",
"user_geo_o",
"user_geo_t",
"user_geo_v",
"user_in_hot_pos_1",
"user_in_hot_pos_2",
"user_in_hot_pos_3",
"user_in_hot_pos_4",
"user_in_hot_pos_5",
"user_in_hot_pos_6",
"user_in_hot_pos_7",
"user_item_buy_again",
"user_item_cart_nobuy",
"user_item_click_nobuy",
"user_item_lastday_buy_again",
"user_item_lastday_cart_nobuy",
"user_item_lastday_click_nobuy",
"user_item_lastday_star_nobuy",
"user_item_star_nobuy",


	"user_is_robot",
	"item_nouser",
	"user_left_item",
	]
	
	signed_log_features = [
		"user_cat_aveThreeDayDelta_add_car",
"user_cat_aveThreeDayDelta_buy",
"user_cat_aveThreeDayDelta_click",
"user_cat_aveThreeDayDelta_star",

"user_item_aveThreeDayDelta_add_car",
"user_item_aveThreeDayDelta_buy",
"user_item_aveThreeDayDelta_click",
"user_item_aveThreeDayDelta_star",

	]
	
	log_features = [
		
	"cat_add_car",
"cat_add_star",
"cat_before_halfmonth_add_car",
"cat_before_halfmonth_buy",
"cat_before_halfmonth_click",
"cat_before_halfmonth_star",
"cat_buy_count",
"cat_buy_user_number",
"cat_click_count",
"cat_halfmonth_add_car",
"cat_halfmonth_buy",
"cat_halfmonth_click",
"cat_halfmonth_star",
"cat_lastday_add_car",
"cat_lastday_buy",
"cat_lastday_click",
"cat_lastday_star",
"cat_lastweek_add_car",
"cat_lastweek_buy",
"cat_lastweek_click",
"cat_lastweek_star",
"geo_users_number",
"item_added_car",
"item_added_start",
"item_before_halfmonth_add_car",
"item_before_halfmonth_buy",
"item_before_halfmonth_click",
"item_before_halfmonth_star",
"item_buy_count",
"item_buy_user_number",
"item_click_count",
"item_halfmonth_add_car",
"item_halfmonth_buy",
"item_halfmonth_click",
"item_halfmonth_star",
"item_lastday_add_car",
"item_lastday_buy",
"item_lastday_click",
"item_lastday_count",
"item_lastday_star",
"item_lastweek_add_car",
"item_lastweek_buy",
"item_lastweek_click",
"item_lastweek_star",
"user_action_count",
"user_add_car",
"user_add_star",
"user_buy_cat_number",
"user_buy_count",
"user_buy_item_number",

"user_cat_before_halfmonth_add_car",
"user_cat_before_halfmonth_buy",
"user_cat_before_halfmonth_click",
"user_cat_before_halfmonth_star",
"user_cat_count",
"user_cat_halfmonth_add_car",
"user_cat_halfmonth_buy",
"user_cat_halfmonth_click",
"user_cat_halfmonth_star",
"user_cat_lastday_add_cart",
"user_cat_lastday_add_star",
"user_cat_lastday_buy",
"user_cat_lastday_count",
"user_cat_lastweek_add_car",
"user_cat_lastweek_buy",
"user_cat_lastweek_click",
"user_cat_lastweek_star",
"user_item_before_halfmonth_add_car",
"user_item_before_halfmonth_buy",
"user_item_before_halfmonth_click",
"user_item_before_halfmonth_star",
"user_item_buy",

"user_item_count",

"user_item_halfmonth_add_car",
"user_item_halfmonth_buy",
"user_item_halfmonth_click",
"user_item_halfmonth_star",
"user_item_lastday_add_cart",
"user_item_lastday_add_star",
"user_item_lastday_buy",
"user_item_lastday_count",
"user_item_lasttime",
"user_item_lastweek_add_car",
"user_item_lastweek_buy",
"user_item_lastweek_click",
"user_item_lastweek_star",

"user_lastday_add_cart",
"user_lastday_add_star",
"user_lastday_buy",
"user_lastday_count",
"usergeo_item_before_lastday_buy",
"usergeo_item_before_lastday_cart",
"usergeo_item_before_lastday_click",
"usergeo_item_before_lastday_star",
"usergeo_item_lastday_buy",
"usergeo_item_lastday_cart",
"usergeo_item_lastday_click",
"usergeo_item_lastday_star",
	]
	
	linear_features = [
		"user_item_geo_distance",
	]
	X1 = np.log(.3+data[log_features])
	X2 = data[factor_features]
	X3 = data[linear_features]
	X4 = np.copysign(np.log(.3+np.abs(data[signed_log_features])),np.sign(data[signed_log_features]))
	
	X = pandas.concat([X1, X2,X3, X4], axis=1)
	
	
	# transformer = sklearn.preprocessing.MinMaxScaler()
	# X = transformer.fit_transform(X[_feature_names])

	return X[_feature_names]
Example #32
0
import hashlib
import os
import sys

utils_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
utils_dir = os.path.join(utils_dir, 'utils')
sys.path.append(utils_dir)

import util  # noqa: E402

parent_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))

cfd = os.path.join(parent_path, 'data', 'intermediate', 'class_file_dirs')
wfd = os.path.join(parent_path, 'data', 'intermediate', 'write_file_dirs')
class_file_dirs = util.load_obj(cfd)
write_file_dirs = util.load_obj(wfd)

class_file_hashes = []
write_file_hashes = []

count = 0
for tup in class_file_dirs:
    if count % 100000 == 0:
        print('hashed %d class images' % count)

    (cclass, cfile) = tup
    file_path = os.path.join(parent_path, cfile)

    chash = hashlib.md5(open(file_path, 'rb').read()).hexdigest()

    class_file_hashes.append((cclass, cfile, chash))
# Created:     03/03/2015
# Copyright:   (c) Animesh 2015
# Licence:     <your licence>
# -------------------------------------------------------------------------------

from __future__ import division
import itertools, constants, getTextFromDoc, util, tokenizer, re
from collections import Counter
import os, json
from stemming.porter2 import stem
from datetime import datetime
from time import time
import termStats
import sys, shutil, StringIO

DOC_ID_MAP = util.load_obj('DOC_ID_MAP.pkl')

vocab = set()
v = open(os.path.join(constants.TEMP_DIR, 'vocab.dat'), "w")
doc_len_map = dict()
total_tokens = 0
no_of_docs = 0


def termPositions(lst, element):
    result = []
    offset = -1
    while True:
        try:
            offset = lst.index(element, offset + 1)
        except ValueError:
Example #34
0
                    fast_parameters.append(weight.fast)
            loss_all.append(F.l1_loss(y_qry[i], model(x_qry[i])).item())
    loss_all = np.array(loss_all)
    print('{}+/-{}'.format(np.mean(loss_all), 1.96 * np.std(loss_all, 0) /
                           np.sqrt(len(loss_all))))


if __name__ == '__main__':
    args = parse_args()
    if not args.test:
        run(args,
            num_workers=1,
            log_interval=100,
            verbose=True,
            save_path=None)
    else:
        utils.set_seed(args.seed)
        code_root = os.path.dirname(os.path.realpath(__file__))
        mode_path = utils.get_path_from_args(args)
        mode_path = '9b8290dd3f63cbafcd141ba21282c783'
        path = '{}/{}_result_files/'.format(code_root, args.task) + mode_path
        logger = utils.load_obj(path)
        model = logger.valid_model[-1]
        dataloader_test = DataLoader(
            Metamovie(args, partition='test',
                      test_way='old'),  #old, new_user, new_item, new_item_user
            batch_size=1,
            num_workers=args.num_workers)
        evaluate_test(args, model, dataloader_test)
    # --- settings ---
Example #35
0
    returns:
    - 0 through 9 for classes representing respective numbers
    - 10 through 35 for classes representing respective uppercase letters
    - 36 through 61 for classes representing respective lowercase letters
    '''
    if c.isdigit() and int(c) < 40:
        return (int(c) - 30)
    elif int(c, 16) <= 90: # uppercase
        return (int(c, 16) - 55)
    else:
        return (int(c, 16) - 61)

parent_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))

by_writer_dir = os.path.join(parent_path, 'data', 'intermediate', 'images_by_writer')
writers = util.load_obj(by_writer_dir)

num_json = int(math.ceil(len(writers) / MAX_WRITERS))

users = [[] for _ in range(num_json)]
num_samples = [[] for _ in range(num_json)]
user_data = [{} for _ in range(num_json)]

writer_count = 0
json_index = 0
for (w, l) in writers:

    users[json_index].append(w)
    num_samples[json_index].append(len(l))
    user_data[json_index][w] = {'x': [], 'y': []}
Example #36
0
import os
import sys

utils_dir = os.path.dirname(
    os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
utils_dir = os.path.join(utils_dir, 'utils')
sys.path.append(utils_dir)

import util

parent_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))

cfhd = os.path.join(parent_path, 'data', 'intermediate', 'class_file_hashes')
wfhd = os.path.join(parent_path, 'data', 'intermediate', 'write_file_hashes')
class_file_hashes = util.load_obj(cfhd)  # each elem is (class, file dir, hash)
write_file_hashes = util.load_obj(
    wfhd)  # each elem is (writer, file dir, hash)

class_hash_dict = {}
for i in range(len(class_file_hashes)):
    (c, f, h) = class_file_hashes[len(class_file_hashes) - i - 1]
    class_hash_dict[h] = (c, f)

write_classes = []
for tup in write_file_hashes:
    (w, f, h) = tup
    write_classes.append((w, f, class_hash_dict[h][0]))

wwcd = os.path.join(parent_path, 'data', 'intermediate', 'write_with_class')
util.save_obj(write_classes, wwcd)
Example #37
0
        cnt = 0
        isTooFast = False
        isTooSlow = False
        for seq in stat[key]['Sequences']:
            duration = seq[1] - seq[0]
            distance = util.dis(seq[2], seq[4], seq[3], seq[5])
            if duration != 0: speed = distance / duration
            else: speed = 0
            if duration >= minSequenceDuration: cnt += 1
            if speed > maxSpeed: isTooFast = True
            if speed < minSpeed: isTooSlow = True
            # print(distance/1000, duration/1000/60, speed / 1000 * 1000 * 60 * 60)
        if cnt < minSequenceCnt: continue
        if isTooFast or isTooSlow: continue

        # decide to add the key
        filteredPlane[key] = 1
        longFlightCnt += 1

    print("remain planes cnt = ", longFlightCnt)


# filter planes
print('filter planes')
stat = util.load_obj(os.path.join(conf["output_folder"], 'stat'))
# print (stat)
filterPlanes(stat, filteredPlane)

with open(os.path.join(conf["output_folder"], 'filtered-plane.json'),
          'w') as f:
    f.write(json.dumps(filteredPlane))
def train_network(model, game_state, observe=False):
    last_time = time.time()
    # store the previous observations in replay memory
    D = load_obj("D")  # load from file system
    # get the first state by doing nothing
    do_nothing = np.zeros(ACTIONS)
    do_nothing[0] = 1  # 0 => do nothing,
    # 1=> jump

    x_t, r_0, terminal = game_state.get_state(
        do_nothing)  # get next step after performing the action

    s_t = np.stack((x_t, x_t, x_t, x_t),
                   axis=2)  # stack 4 images to create placeholder input

    s_t = s_t.reshape(1, s_t.shape[0], s_t.shape[1], s_t.shape[2])  # 1*20*40*4

    initial_state = s_t

    if observe:
        OBSERVE = 999999999  # We keep observe, never train
        epsilon = FINAL_EPSILON
        print("Now we load weight")
        model.load_weights("model.h5")
        adam = Adam(lr=LEARNING_RATE)
        model.compile(loss='mse', optimizer=adam)
        print("Weight load successfully")
    else:  # We go to training mode
        OBSERVE = OBSERVATION
        epsilon = load_obj("epsilon")
        model.load_weights("model.h5")
        adam = Adam(lr=LEARNING_RATE)
        model.compile(loss='mse', optimizer=adam)

    t = load_obj(
        "time")  # resume from the previous time step stored in file system
    while True:  # endless running

        loss = 0
        Q_sa = 0
        action_index = 0
        a_t = np.zeros([ACTIONS])  # action at t

        # choose an action epsilon greedy
        if t % FRAME_PER_ACTION == 0:  # parameter to skip frames for actions
            if random.random() <= epsilon:  # randomly explore an action
                print("----------Random Action----------")
                action_index = random.randrange(ACTIONS)
                a_t[0] = 1
            else:  # predict the output
                q = model.predict(
                    s_t)  # input a stack of 4 images, get the prediction
                max_Q = np.argmax(q)  # choose index with maximum q value
                action_index = max_Q
                a_t[action_index] = 1  # 0=> do nothing, 1=> jump

        # We reduced the epsilon (exploration parameter) gradually
        if epsilon > FINAL_EPSILON and t > OBSERVE:
            epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / EXPLORE

            # run the selected action and observed next state and reward
        x_t1, r_t, terminal = game_state.get_state(a_t)
        print('fps: {0}'.format(1 / (time.time() - last_time))
              )  # helpful for measuring frame rate
        last_time = time.time()
        x_t1 = x_t1.reshape(1, x_t1.shape[0], x_t1.shape[1], 1)  # 1x20x40x1
        s_t1 = np.append(
            x_t1, s_t[:, :, :, :3], axis=3
        )  # append the new image to input stack and remove the first one

        # store the transition in D
        D.append((s_t, action_index, r_t, s_t1, terminal))
        if len(D) > REPLAY_MEMORY:
            D.popleft()

        # only train if done observing
        if t > OBSERVE:

            # sample a mini_batch to train on
            mini_batch = random.sample(D, BATCH)
            inputs = np.zeros((BATCH, s_t.shape[1], s_t.shape[2],
                               s_t.shape[3]))  # 32, 20, 40, 4
            targets = np.zeros((inputs.shape[0], ACTIONS))  # 32, 2

            # Now we do the experience replay
            for i in range(0, len(mini_batch)):
                state_t = mini_batch[i][0]  # 4D stack of images
                action_t = mini_batch[i][1]  # This is action index
                reward_t = mini_batch[i][
                    2]  # reward at state_t due to action_t
                state_t1 = mini_batch[i][3]  # next state
                terminal = mini_batch[i][
                    4]  # wheather the agent died or survided due the action

                inputs[i:i + 1] = state_t

                targets[i] = model.predict(state_t)  # predicted q values
                Q_sa = model.predict(
                    state_t1)  # predict q values for next step

                if terminal:
                    targets[
                        i,
                        action_t] = reward_t  # if terminated, only equals reward
                else:
                    targets[i, action_t] = reward_t + GAMMA * np.max(Q_sa)

            loss += model.train_on_batch(inputs, targets)
            loss_df.loc[len(loss_df)] = loss
            q_values_df.loc[len(q_values_df)] = np.max(Q_sa)

        s_t = initial_state if terminal else s_t1  # reset game to initial frame if terminate
        t = t + 1

        # save progress every 1000 iterations
        if t % 1000 == 0:
            print("Now we save model")
            game_state._game.pause()  # pause game while saving to filesystem
            model.save_weights("model.h5", overwrite=True)
            save_obj(D, "D")  # saving episodes
            save_obj(t, "time")  # caching time steps
            save_obj(epsilon, "epsilon"
                     )  # cache epsilon to avoid repeated randomness in actions
            loss_df.to_csv("./objects/loss_df.csv", index=False)
            scores_df.to_csv("./objects/scores_df.csv", index=False)
            actions_df.to_csv("./objects/actions_df.csv", index=False)
            q_values_df.to_csv(q_value_file_path, index=False)
            with open("model.json", "w") as outfile:
                json.dump(model.to_json(), outfile)
            clear_output()
            game_state._game.resume()
        # print info
        if t <= OBSERVE:
            state = "observe"
        elif t <= OBSERVE + EXPLORE:
            state = "explore"
        else:
            state = "train"

        print "TIMESTAMP", t, "/ STATE", state, "/ EPSILON", epsilon, "/ ACTION"\
            , action_index, "/ REWARD", r_t, "/ Q_MAX ", np.max(Q_sa), "/ Loss ", loss