def run(dest, results_path):
    # open up the database
    store = pd.HDFStore(
        os.path.abspath(os.path.join(results_path, 'model_fall_responses.h5')))

    sigma, phi = util.get_params()

    all_data = pd.DataFrame([])
    for query in store.root._v_children:

        # look up the name of the key for the parameters that we want (will be
        # something like params_0)
        params = store["/{}/param_ref".format(query)]\
            .reset_index()\
            .set_index(['sigma', 'phi'])['index']\
            .ix[(sigma, phi)]

        # load in the data
        data = store["{}/{}".format(query, params)]
        all_data = all_data.append(data)

    all_data = all_data\
        .set_index(['query', 'block', 'stimulus', 'kappa0'])\
        .sortlevel()

    store.close()
    all_data.to_csv(dest)
Ejemplo n.º 2
0
def svmvec(path, output_filename):
    conn = sqlite3.connect(path)
    c = conn.cursor()
    c_inner = conn.cursor()
    c_inner2 = conn.cursor()

    params = util.get_params(c, path)

    c.execute('SELECT COUNT(ED_ENC_NUM) FROM Documents')
    num_total_docs = int(c.fetchone()[0])

    c.execute('select ED_ENC_NUM, Score from Documents')
    i = 1
    with open(output_filename, 'w') as fout_samples:
        with open(output_filename + ".id", 'w') as fout_ids:
            for doc_id, score in c:
                if i % 100 == 0:
                    print(('svmvec(): processing document %s (%d/%d)' %
                           (str(doc_id), i, num_total_docs)))
                c_inner.execute(
                    """SELECT DocumentsToDimensions.DimensionId, Count
                        FROM DocumentsToDimensions INNER JOIN Dimensions
                        ON DocumentsToDimensions.DimensionId = Dimensions.DimensionId
                        WHERE DocumentsToDimensions.ED_ENC_NUM = ?
                        AND Dimensions.Exclude = 0
                        AND Count > 0""", (doc_id, ))
                if score == None:
                    score = 0
                elif score > 100:
                    score = 100
                elif score < -100:
                    score = -100
                assert -100 <= score <= 100
                print('%d' % (score / 100), end=' ', file=fout_samples)
                print(doc_id, file=fout_ids)
                for dim_id, count in c_inner:
                    c_inner2.execute(
                        """SELECT IDF FROM Dimensions
                        WHERE DimensionId = ?""", (dim_id, ))
                    idf = float(c_inner2.fetchone()[0])

                    #
                    # The SELECT statement above protects us from zero count.
                    #
                    tfidf = 1 + log10(count) * idf
                    if params['USE_BINARIZED_TDF']:
                        tfidf = 1 if tfidf > float(params['C_BINARIZE']) else 0
                        print('%d:%d' % (dim_id, tfidf),
                              end=' ',
                              file=fout_samples)
                    else:
                        print('%d:%d' % (dim_id, tfidf),
                              end=' ',
                              file=fout_samples)
                print(file=fout_samples)
                i += 1

    c_inner.close()
    c.close()
    conn.close()
Ejemplo n.º 3
0
Archivo: trainer.py Proyecto: tkd26/RCM
    def __init__(self, config):
        """Initialize Trainer

        Args:
            config (dict): Configuration dictionary
        """
        super(Trainer, self).__init__()

        # Define multi-task setting
        dataset = config['dataset']
        dataset_name = dataset['dataset_name']
        self.tasks_weighting = dataset['tasks_weighting']
        self.tasks = [k for k, v in self.tasks_weighting.items()]

        # Setup network
        model_config = config['model']
        self.model = get_module(model_config, dataset_name, self.tasks)
        print('Model constructed for {}'.format(' '.join(self.tasks)))

        if 'grouping' in model_config:
            print('groups = {}'.format(model_config['grouping']['groups']))
            print('grouping method = {}'.format(model_config['grouping']['method']))
            self.model = update_module(config, self.model, self.tasks)

        # Setup for a task-conditional setting
        model_params = config['model']['parameters']
        if 'common_mt_params' in model_params:
            self.task_conditional = not model_params['common_mt_params']
        else:
            self.task_conditional = False

        # Setup optimizers
        optimizer_config = config['optimizer']
        optimizer_cls = get_optimizer(optimizer_config['algorithm'])
        model_params = get_params(self.model, optimizer_config['parameters']['lr'], len(self.tasks),
                                  self.task_conditional, self.tasks)
        self.optimizer = optimizer_cls(model_params, **optimizer_config['parameters'])

        # Setup schedulers
        scheduler_config = config['scheduler']
        scheduler_cls = get_scheduler(scheduler_config['lr_policy'])
        self.scheduler = scheduler_cls(self.optimizer, **scheduler_config['parameters'])

        # Setup loss function
        losses_config = config['loss']
        self.criterions = get_loss_functions(self.tasks, losses_config)

        # Initialise performance meters
        self.best_val_loss = 1e9
        self.train_loss = {}
        self.val_loss = {}
        for task in self.tasks:
            self.train_loss[task] = get_running_meter()
            self.val_loss[task] = get_running_meter()

        # Initialize img logging for visualization
        self.img_logging = get_img_logging(dataset_name, self.tasks)
        self.pred_decoder = get_pred_decoder(dataset_name, self.tasks)
Ejemplo n.º 4
0
def classify(path, svm_test, svm_classifier, test_results, temporary_dir):
    tmp_file = P.join(temporary_dir, 'svm_classify.txt')
    ids_file = P.join(temporary_dir, 'test-samples.dat.id')
    conn = sqlite3.connect(path)
    p = sub.Popen([SVM_CLASSIFY, svm_test, svm_classifier, tmp_file],
                  stderr=sub.PIPE,
                  stdout=sub.PIPE)
    stdout, stderr = p.communicate()
    if stderr:
        print(stderr, file=sys.stderr)
        return

    precision, recall = -1, -1
    for line in stdout.decode().split('\n'):
        print(line)
        match = REGEX.match(line.strip())
        if match:
            precision, recall = match.groups()

    c = conn.cursor()
    params = util.get_params(c, path)
    scores = [
        int(float(f) / params['CLASSIFY_CLIP'] * 100)
        for f in open(tmp_file).read().strip().split('\n')
    ]

    if test_results:
        assert precision >= 0 and recall >= 0
        print(('precision:', precision, 'recall:', recall))
        c.execute('SELECT ED_ENC_NUM, Score FROM Documents')
        for (i, (doc_id, score)) in enumerate(c):
            if i >= len(scores):
                print('Premature end of training file', file=sys.stderr)
                assert False
            if score < 0 and scores[i] < 0:
                continue
            elif score > 0 and scores[i] > 0:
                continue
            print((doc_id, 'expected:', score, 'actual:', scores[i]))
    else:
        c_inner = conn.cursor()
        c.execute('SELECT ED_ENC_NUM FROM Documents')
        i = 0
        for doc_id in open(ids_file).read().strip().split('\n'):
            if i >= len(scores):
                print('Premature end of training file', file=sys.stderr)
                assert False
            c_inner.execute(
                'UPDATE Documents SET Score = ? WHERE ED_ENC_NUM = ?',
                (scores[i], doc_id))
            i = i + 1
        c_inner.close()

    c.close()
    conn.commit()
Ejemplo n.º 5
0
	def __init__(self, fimage=None, location="LaSilla"):
		
		# Todo: load ALL PARAMS
		self.location = location
		self.params = util.get_params(location)
		
		if fimage is None:
			fimage = "current.JPG"
			self.fimage = fimage
			self.retrieve_image()
			
		self.im_masked, self.im_original = util.loadallsky(fimage, return_complete=True)
		self.mask = util.get_mask(self.im_original)
		self.observability_map = None
Ejemplo n.º 6
0
 def __init__(self):
     param_list = util.get_params()
     self.project_name = param_list[1]
     self.project_list = util.get_comma_seprated_list(param_list[2])
     self.svn_code_repository = param_list[3]
     self.svn_username = param_list[4]
     self.svn_password = param_list[5]
     self.svn_auth = ' --username ' + self.svn_username + ' --password ' + self.svn_password
     print('svn_auth: ' + self.svn_auth)
     self.workspace = os.getcwd()
     self.war_name = self.project_name + FORMATE_WAR
     print('war name is :' + self.war_name)
     self.conf_dir = util.get_conf_dir(self.project_name)
     print('conf_dir is :' + self.conf_dir)
Ejemplo n.º 7
0
def learn(path, training, svm_classifier):
    conn = sqlite3.connect(path)
    c = conn.cursor()
    param = util.get_params(c, path)
    if param['SVM_LEARN']:
        options = param['SVM_LEARN'].split(' ')
    else:
        options = []

    cmdline = [SVM_LEARN] + options + [training, svm_classifier]
    p = sub.Popen(cmdline, stderr=sub.PIPE, stdout=sub.PIPE)
    stdout, stderr = p.communicate()
    if stderr:
        print >> sys.stderr, stderr
        return

    c.close()
Ejemplo n.º 8
0
def main(self):
    (train_data, train_label, train_seq_len), (dev_data, dev_label, dev_seq_len), (test_data, test_label, test_seq_len), _, _ = dp.data_preprocess()
    params = get_params()
    
    if DEFINES.train:
        check_and_create_path()
        
        estimator = tf.estimator.Estimator(
        model_fn=model.model_fn, 
        model_dir=DEFINES.ckpt_path,  
        params=params, config=tf.estimator.RunConfig(
              save_checkpoints_steps=30,
              save_summary_steps=1,
            log_step_count_steps=10))


        train_spec = tf.estimator.TrainSpec(
            input_fn=lambda:dp.train_input_fn(
                train_data, train_seq_len, train_label, DEFINES.batch_size
            ), max_steps=DEFINES.train_step)

        eval_spec = tf.estimator.EvalSpec(
            input_fn=lambda: dp.eval_input_fn(
                dev_data, dev_seq_len, dev_label, len(dev_data)
            ), exporters = [BestCheckpointsExporter()], start_delay_secs=0, throttle_secs=0)

        tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)

        print('Training finished')
        
    print('Evaluate testset')
    
    assert glob.glob(os.path.join(DEFINES.best_ckpt_path, '*.ckpt*')), 'Checkpoint does not exist'
    

    estimator = tf.estimator.Estimator(
    model_fn=model.model_fn, 
    model_dir=DEFINES.best_ckpt_path,  
    params=params)

    
    test_result = estimator.evaluate(input_fn=lambda: dp.eval_input_fn(
    test_data, test_seq_len, test_label, len(test_data)))
    
    print('\nEVAL set accuracy: {accuracy:0.3f}\n'.format(**test_result))
Ejemplo n.º 9
0
def main():
    elastic = Elastic()
    config = get_params()
    path = config['crawler']['path']

    # download imdb files
    wget.download(BASICS, out=path)
    wget.download(RATINGS, out=path)
    wget.download(EPISODES, out=path)

    # crawl movies and add to elastic
    crawl()
    elastic.insert_elastic()

    # remove files
    os.remove(f"{config['crawler']['path']}/title.basics.tsv.gz")
    os.remove(f"{config['crawler']['path']}/title.ratings.tsv.gz")
    os.remove(f"{config['crawler']['path']}/title.episode.tsv.gz")
Ejemplo n.º 10
0
from db.factory import BasicFactory
from elasticsearch import Elasticsearch, helpers, exceptions
from util import get_params, set_logger
import json

bf = BasicFactory()
config = get_params()
logger = set_logger('elastic')

es = Elasticsearch(hosts=[{
    'host': config['elastic']['host'],
    'port': config['elastic']['port']
}])


class Elastic:
    INDEX = config['elastic']['index']

    def search(self, name):
        """
        Autocomplete for movies

        :param str name: prefix from search
        :return: movies
        :rtype: dict
        """
        message = {'status': False}
        query = {
            'suggest': {
                'movie': {
                    'prefix': name,
Ejemplo n.º 11
0
    def on_threadSample_newSample(self, sample):
        observability = sample[1]
        rest = sample[0]

        self.matplotlibWidget.figure.tight_layout()

        self.matplotlibWidget.axis.imshow(rest,
                                          vmin=0,
                                          vmax=255,
                                          cmap=plt.get_cmap('Greys_r'))
        self.matplotlibWidget.axis.imshow(observability,
                                          cmap=plt.get_cmap('RdYlGn'),
                                          alpha=0.2)

        #theta_coordinates = np.deg2rad([-146,0,45,90,0,180,170,190,200,0, 270, 315])
        theta_coordinates = np.deg2rad(np.arange(0, 360, 15))

        params = util.get_params(location="LaSilla")

        ff = params['ff']
        k1 = params['k1']
        k2 = params['k2']
        r0 = params['r0']
        cx = params['cx']
        cy = params['cy']
        north = params['north']
        deltatetha = params['deltatetha']
        url_weather = params['url_weather']
        wpl = params['wind_pointing_limit']
        wsl = params['wind_stopping_limit']

        coordinatesx = np.cos(north + theta_coordinates) * r0 + cx
        coordinatesy = np.sin(north + theta_coordinates) * r0 + cy

        northx, northy = util.get_image_coordinates(np.deg2rad(0),
                                                    np.deg2rad(24))
        eastx, easty = util.get_image_coordinates(np.deg2rad(90),
                                                  np.deg2rad(20))

        self.matplotlibWidget.axis.annotate('N',
                                            xy=(northx, northy),
                                            rotation=deltatetha,
                                            horizontalalignment='center',
                                            verticalalignment='center')

        self.matplotlibWidget.axis.annotate('E',
                                            xy=(eastx, easty),
                                            rotation=deltatetha,
                                            horizontalalignment='center',
                                            verticalalignment='center')

        altshow = [15, 30, 45, 60, 75, 90]
        for angle in np.deg2rad(altshow):
            rr = util.get_radius(angle, ff, k1, k2, r0)

            #if angle >= np.pi/2: print rr/330.
            self.matplotlibWidget.figure.gca().add_artist(
                plt.Circle((cx, cy), rr, color='k', fill=False))

            textx = np.cos(north + np.deg2rad(180)) * (rr - 2) + cx
            texty = np.sin(north + np.deg2rad(180)) * (rr - 2) + cy
            self.matplotlibWidget.axis.annotate(
                '%d' % (90 - np.ceil(np.rad2deg(angle))),
                xy=(textx, texty),
                rotation=deltatetha,  #prefered_direction['dir'],
                horizontalalignment='left',
                verticalalignment='center',
                size=10)

        WD, WS = get_wind(url_weather)
        WDd = WD
        WD = np.deg2rad(WD)

        if WS is not None and WS > wpl:
            wdcoordinatesx = np.cos(north - WD) * r0 + cx
            wdcoordinatesy = np.sin(north - WD) * r0 + cy
            Nd = np.rad2deg(north)  # + 90.

            if WS > wsl:
                cw = 'r'
                self.matplotlibWidget.axis.add_patch(
                    Wedge([cx, cy],
                          r0,
                          Nd - WDd,
                          Nd - WDd + 360,
                          fill=False,
                          hatch='//',
                          edgecolor=cw))
                self.matplotlibWidget.axis.annotate(
                    'WIND LIMIT\nREACHED',
                    xy=(cx, cy),
                    rotation=0,
                    horizontalalignment='center',
                    verticalalignment='center',
                    color=cw,
                    fontsize=35)
            elif WS > wpl:
                cw = 'darkorange'
                wtcoordinatesx = np.cos(north - WD) * r0 / 2. + cx
                wtcoordinatesy = np.sin(north - WD) * r0 / 2. + cy

                self.matplotlibWidget.axis.add_patch(
                    Wedge([cx, cy],
                          r0,
                          -90 + Nd - WDd,
                          90 + Nd - WDd,
                          fill=False,
                          hatch='//',
                          edgecolor=cw))
                self.matplotlibWidget.axis.annotate(
                    'Pointing limit!',
                    xy=(wtcoordinatesx, wtcoordinatesy),
                    rotation=0,
                    horizontalalignment='center',
                    verticalalignment='center',
                    color=cw,
                    fontsize=25)

            self.matplotlibWidget.axis.plot([cx, wdcoordinatesx],
                                            [cy, wdcoordinatesy],
                                            lw=3,
                                            color=cw)

        #plt.plot([cx, northx], [cy, northy], lw=2, color='k')
        for ccx, ccy in zip(coordinatesx, coordinatesy):
            self.matplotlibWidget.axis.plot([cx, ccx], [cy, ccy],
                                            lw=1,
                                            color='k')
        self.matplotlibWidget.axis.set_ylim([np.shape(rest)[0], 0])
        self.matplotlibWidget.axis.set_xlim([0, np.shape(rest)[1]])

        self.matplotlibWidget.axis.set_axis_off()
        self.matplotlibWidget.canvas.draw()
Ejemplo n.º 12
0
                        #print(info)
                        agent.send(info.encode())
            except IOError as e:
                if (e.errno == errno.EWOULDBLOCK):
                    pass


def create_rfid_status():
    rfid_status = {}
    for tag, position in card_id_dict.items():
        rfid_status[position] = 0
    return rfid_status


if __name__ == "__main__":
    params = get_params(sys.argv)
    #download_host = params.get("download_host")
    #download_port = params.get("download_port")
    ip = params.get("ip")
    port = int(params.get("port"))
    # load from db
    card_id_dict = load_card_ids()
    rfid_status = create_rfid_status()
    cars_position = {}
    agents = []
    my_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    bind_connection()
    Thread(target=accept_connection).start()
    Thread(target=receive_request).start()
    #receive_request()
Ejemplo n.º 13
0
train_feat = train.drop(train_columns_to_drop, axis=1) 
test_feat = test.drop(test_columns_to_drop, axis=1) 
factorize_category(train_feat)
train_feat.fillna(nan,inplace=True)
test_feat.fillna(nan,inplace=True)


train_feat_final = train_feat


xgtrain = xgb.DMatrix(train_feat_final, train['target'].values)


# grid search
params = get_params()
params["eta"] = 0.01

min_child_weight_list = [1]
subsample_list = [1]
colsample_bytree_list = [0.6]
max_depth_list = [10]

#min_child_weight_list = [1, 5, 10]
#subsample_list = [0.6, 0.8, 1]
#colsample_bytree_list = [0.6, 0.8, 1]
#max_depth_list = [8, 10, 12]
params_list = []
for min_child_weight in min_child_weight_list:
    for subsample in subsample_list:
        for colsample_bytree in colsample_bytree_list:
Ejemplo n.º 14
0
def index(filename, options):
    """
    Perform indexing.  Each document is stemmed, and then the non-excluded
    dimensions are counted for that document.  The result is put into the
    DocumentsToDimensions table.
    """
    conn = sqlite3.connect(filename)
    c = conn.cursor()
    params = util.get_params(c, filename)
    stemmer = params['stemmer']
    print 'index(): stemmer: %s' % stemmer

    stemmers = { 'porter' : nltk.PorterStemmer(),
            'lancaster' : nltk.LancasterStemmer() }
    try:
        stemmer = stemmers[stemmer]
    except KeyError:
        print 'unsupported stemmer:', stemmer
        return 1

    all_dim = util.get_dimensions(c, 0)
    assert all_dim, "You must calculate dimensions prior to indexing."

    all_include = util.get_all_include_regex(c)

    c.execute('SELECT COUNT(ED_ENC_NUM) FROM Documents')
    num_total_docs = int(c.fetchone()[0])

    c.execute('DELETE FROM DocumentsToDimensions')

    c.execute("SELECT COUNT(*) FROM Dimensions WHERE PartOfSpeech = 'bigram'")
    nBigrams = int(c.fetchone()[0])
    print 'Number of bigrams: ', nBigrams
    do_bigrams = nBigrams > 0

    c.execute("SELECT COUNT(*) FROM Dimensions WHERE PartOfSpeech = 'trigram'")
    nTrigrams = int(c.fetchone()[0])
    print 'Number of trigrams: ', nTrigrams
    do_trigrams = nTrigrams > 0

    #
    # If the POS column contains "unigram", then it means we didn't perform POS tagging when calculating dimensions.
    #
    c.execute("SELECT COUNT(*) FROM Dimensions WHERE PartOfSpeech = 'unigram'")
    pos_tag = int(c.fetchone()[0]) == 0

    cmd = 'SELECT ED_ENC_NUM FROM Documents'
    if options.limit:
        cmd += ' LIMIT %d' % options.limit
        num_total_docs = min(options.limit, num_total_docs)
    #
    # TODO: why is fetchmany not working?
    #
    #document_ids = c.execute(cmd).fetchmany()
    document_ids = []
    for row in c.execute(cmd):
        document_ids.append(row[0])
    print "fetched %d document ids" % len(document_ids)
    num_batches = int(math.ceil(len(document_ids)/options.batch_size))

    #
    # Set up multiprocessing.
    #
    # MAIN_PROCESS -> document_id_queue -> WORKER_PROCESSES
    #
    # Each worker subprocess reads a document from the SQL database, processes it, and writes back to the database..
    #
    document_id_queue = multiprocessing.Queue()
    proc_queue = multiprocessing.Queue()
    for i in xrange(num_batches):
        start = i*options.batch_size
        end = start+options.batch_size
        document_id_queue.put(Batch(start, document_ids[start:end]))
    for i in range(options.subprocesses):
        document_id_queue.put(None)

    #
    # Terminate the SQL connection so that the subprocesses can use it.
    #
    conn.commit()
    conn.close()

    #
    # https://docs.python.org/2/library/array.html#module-array
    #
    counter = multiprocessing.Value("I")

    pr_list = []
    for i in range(options.subprocesses):
        args = (document_id_queue, filename, stemmer, all_include, pos_tag, do_bigrams, do_trigrams, all_dim, counter)
        p = multiprocessing.Process(target=worker_subprocess, args=args)
        p.start()
        pr_list.append(p)

    #
    # Wait for all worker subprocesses to complete.
    #
    for i, p in enumerate(pr_list):
        p.join()

    #
    # Calculate IDF weighting.
    #
    conn = sqlite3.connect(filename)
    c = conn.cursor()
    for dim_id, _, _ in all_dim:
        c.execute("""SELECT COUNT(DimensionId)
                FROM DocumentsToDimensions
                WHERE DimensionId = ?""", (dim_id,))
        freq = int(c.fetchone()[0])
        idf = log10(num_total_docs/(1+freq))
        c.execute(
                'UPDATE Dimensions SET IDF = ? WHERE DimensionId = ?',
                (idf, dim_id))

    #
    # Save and exit.
    #
    conn.commit()
    c.close()
Ejemplo n.º 15
0
test_feat = test.drop(test_columns_to_drop, axis=1)
factorize_category(train_feat)

# handle numeric nan
train_feat.fillna(nan, inplace=True)
test_feat.fillna(nan, inplace=True)
# handle numeric nan
train_feat.replace(np.inf, -999, inplace=True)
test_feat.replace(np.inf, -999, inplace=True)

train_feat_final = train_feat

xgtrain = xgb.DMatrix(train_feat_final, train['target'].values)

# grid search
params = get_params()
params["eta"] = 0.05

min_child_weight_list = [1]
subsample_list = [1]
colsample_bytree_list = [0.6]
max_depth_list = [10]

#min_child_weight_list = [1, 5, 10]
#subsample_list = [0.6, 0.8, 1]
#colsample_bytree_list = [0.6, 0.8, 1]
#max_depth_list = [8, 10, 12]
params_list = []
for min_child_weight in min_child_weight_list:
    for subsample in subsample_list:
        for colsample_bytree in colsample_bytree_list:
Ejemplo n.º 16
0
	def on_threadSample_newSample(self, sample):
		observability = sample[1]
		rest = sample[0]
		
		self.matplotlibWidget.figure.tight_layout()

		self.matplotlibWidget.axis.imshow(rest, vmin=0, vmax=255, cmap=plt.get_cmap('Greys_r'))
		self.matplotlibWidget.axis.imshow(observability, cmap=plt.get_cmap('RdYlGn'), alpha=0.2)
		
		#theta_coordinates = np.deg2rad([-146,0,45,90,0,180,170,190,200,0, 270, 315])
		theta_coordinates = np.deg2rad(np.arange(0,360,15))
	
		params = util.get_params(location="LaSilla")

		ff = params['ff']
		k1 = params['k1']
		k2 = params['k2']
		r0 = params['r0']
		cx = params['cx']
		cy = params['cy']
		north = params['north']
		deltatetha = params['deltatetha']
		url_weather = params['url_weather']
		wpl = params['wind_pointing_limit']
		wsl = params['wind_stopping_limit']
		
		coordinatesx = np.cos(north + theta_coordinates) * r0 + cx
		coordinatesy = np.sin(north + theta_coordinates) * r0 + cy
		
		northx, northy = util.get_image_coordinates(np.deg2rad(0), np.deg2rad(24))
		eastx, easty = util.get_image_coordinates(np.deg2rad(90), np.deg2rad(20))

		self.matplotlibWidget.axis.annotate('N', xy=(northx, northy), rotation=deltatetha,
		  horizontalalignment='center', verticalalignment='center')
		
		self.matplotlibWidget.axis.annotate('E', xy=(eastx, easty), rotation=deltatetha,
		  horizontalalignment='center', verticalalignment='center')

		altshow = [15, 30, 45, 60, 75, 90]
		for angle in np.deg2rad(altshow):
			rr = util.get_radius(angle, ff, k1, k2, r0)
		
			#if angle >= np.pi/2: print rr/330.
			self.matplotlibWidget.figure.gca().add_artist(plt.Circle((cx,cy),rr,color='k', fill=False))
		
			textx = np.cos(north + np.deg2rad(180)) * (rr - 2) + cx
			texty = np.sin(north + np.deg2rad(180)) * (rr - 2) + cy
			self.matplotlibWidget.axis.annotate('%d' % (90-np.ceil(np.rad2deg(angle))), xy=(textx, texty), rotation=deltatetha,#prefered_direction['dir'],
			  horizontalalignment='left', verticalalignment='center', size=10)
			
		WD, WS = get_wind(url_weather)
		WDd = WD
		WD = np.deg2rad(WD)

		if WS is not None and WS > wpl:
			wdcoordinatesx = np.cos(north - WD) * r0 + cx
			wdcoordinatesy = np.sin(north - WD) * r0 + cy
			Nd = np.rad2deg(north)# + 90.

			if WS > wsl :
				cw = 'r'
				self.matplotlibWidget.axis.add_patch(Wedge([cx, cy], r0, Nd - WDd, Nd - WDd+360, fill=False, hatch='//', edgecolor=cw))
				self.matplotlibWidget.axis.annotate('WIND LIMIT\nREACHED', xy=(cx, cy), rotation=0,
		  			horizontalalignment='center', verticalalignment='center', color=cw, fontsize=35)
			elif WS > wpl :
				cw = 'darkorange'
				wtcoordinatesx = np.cos(north - WD) * r0 / 2. + cx
				wtcoordinatesy = np.sin(north - WD) * r0 / 2. + cy

				self.matplotlibWidget.axis.add_patch(Wedge([cx, cy], r0, -90+Nd-WDd, 90+Nd-WDd, fill=False, hatch='//', edgecolor=cw))
				self.matplotlibWidget.axis.annotate('Pointing limit!', xy=(wtcoordinatesx, wtcoordinatesy), rotation=0,
		  			horizontalalignment='center', verticalalignment='center', color=cw, fontsize=25)
				
			self.matplotlibWidget.axis.plot([cx, wdcoordinatesx], [cy, wdcoordinatesy], lw=3, color=cw)

		
		#plt.plot([cx, northx], [cy, northy], lw=2, color='k')
		for ccx, ccy in zip(coordinatesx, coordinatesy):
			self.matplotlibWidget.axis.plot([cx, ccx], [cy, ccy], lw=1, color='k')
		self.matplotlibWidget.axis.set_ylim([np.shape(rest)[0], 0])
		self.matplotlibWidget.axis.set_xlim([0, np.shape(rest)[1]])
		
		self.matplotlibWidget.axis.set_axis_off()
		self.matplotlibWidget.canvas.draw()
Ejemplo n.º 17
0
def mrmr(c, path):
    """
    Perform automatic mRMR feature selection using the specified cursor.
    Changes are persisted to the database using the cursor.
    """
    params = util.get_params(c, path)
    #
    # mRMR feature selection
    #
    include_dim = set()
    exclude_dim = set()
    all_dim = util.get_dimensions(c, 0)

    c.execute('SELECT COUNT(ED_ENC_NUM) FROM Documents')
    num_total_docs = int(c.fetchone()[0])
    c.execute('SELECT COUNT(ED_ENC_NUM) FROM Documents WHERE Score > 0')
    num_positive_docs = int(c.fetchone()[0])
    c.execute('SELECT COUNT(ED_ENC_NUM) FROM Documents WHERE Score < 0')
    num_negative_docs = int(c.fetchone()[0])

    #
    # The part below is ported from filterFeatures() of reference.py
    #
    cu = params['C_UPPERCUTOFF'] * num_total_docs
    ccp = params['C_CLASSCUTOFF'] * num_positive_docs
    ccm = params['C_CLASSCUTOFF'] * num_negative_docs
    lcp = params['C_LOWERCUTOFF'] * num_positive_docs
    lcm = params['C_LOWERCUTOFF'] * num_negative_docs

    #
    # The original script didn't have any comments, so here's my guess of what
    # individual variables represent.
    #
    # cu        Upper cut-off.  If a feature occurs in more than cu documents,
    #           then it should be excluded.
    # ccp       Upper class cut-off for positive documents.
    # lcp       Lower class cut-off for positive documents.
    #           If the frequency of a feature within positive documents
    #           falls within this interval, then it should be excluded.
    # ccm       Upper class cut-off for negative documents.
    # lcm       Lower class cut-off for negative documents.
    #           If the frequency of a feature within negative documents
    #           falls within this interval, then it should be excluded.
    #

    for (dim_id, _, _) in all_dim:
        text_count, plus_count, minus_count = 0, 0, 0
        c.execute(
            """SELECT Score
                FROM Documents INNER JOIN DocumentsToDimensions
                ON Documents.ED_ENC_NUM = DocumentsToDimensions.ED_ENC_NUM
                WHERE DimensionId = ?""", (dim_id, ))
        for score in c:
            text_count += 1
            if score > 0:
                plus_count += 1
            elif score < 0:
                minus_count += 1

        if params['USE_UPPERCUTS'] and text_count > cu:
            exclude_dim.add(dim_id)
            if dim_id in include_dim:
                include_dim.remove(dim_id)
        elif params['USE_CLASSCUTS'] and minus_count > ccm and plus_count > ccp:
            exclude_dim.add(dim_id)
            if dim_id in include_dim:
                include_dim.remove(dim_id)
        elif params['USE_LOWERCUTS'] and minus_count < lcm and plus_count < lcp:
            exclude_dim.add(dim_id)
            if dim_id in include_dim:
                include_dim.remove(dim_id)
        else:
            if dim_id in exclude_dim:
                exclude_dim.remove(dim_id)
            include_dim.add(dim_id)
    #
    # end of ported code.
    #
    print('mRMR enabled:', len(include_dim), 'disabled:', len(exclude_dim))

    assert not include_dim.intersection(exclude_dim)
    for dim in include_dim:
        c.execute('UPDATE Dimensions SET Exclude = 0 WHERE DimensionId = ?',
                  (dim, ))
    for dim in exclude_dim:
        c.execute('UPDATE Dimensions SET Exclude = 1 WHERE DimensionId = ?',
                  (dim, ))
Ejemplo n.º 18
0
def run(dest, results_path, data_path, version, seed):
    np.random.seed(seed)
    hyps = [-1.0, 1.0]

    # load empirical probabilities
    human_responses = pd.read_csv(os.path.join(
        results_path, "human_fall_responses_raw.csv"))
    empirical = human_responses\
        .groupby(['version', 'block'])\
        .get_group((version, 'B'))\
        .rename(columns={'kappa0': 'kappa'})\
        .set_index(['stimulus', 'kappa', 'pid'])['fall? response']\
        .unstack('kappa')[hyps]\
        .stack()

    # load feedback
    fb = (util.load_fb(data_path)['C']['nfell'] > 1)\
        .unstack('kappa')[hyps]\
        .stack()\
        .to_frame('fb')\
        .reset_index()

    # load ipe probabilites
    old_store = pd.HDFStore(
        os.path.join(results_path, "model_fall_responses_raw.h5"), mode='r')

    # get the parameters we want
    sigma, phi = util.get_params()

    # dataframe to store all the results
    all_llh = pd.DataFrame([])

    # compute empirical likelihood
    print('empirical')
    llh_empirical = bootstrap_llh(compute_llh, empirical, fb)
    llh_empirical['counterfactual'] = False
    llh_empirical['likelihood'] = 'empirical'
    all_llh = all_llh.append(llh_empirical)

    print('empirical cf')
    llh_empirical_cf = bootstrap_llh(compute_llh_counterfactual, empirical, fb)
    llh_empirical_cf['counterfactual'] = True
    llh_empirical_cf['likelihood'] = 'empirical'
    all_llh = all_llh.append(llh_empirical_cf)

    # compute likelihoods for each query type
    for query in old_store.root._v_children:

        # look up the name of the key for the parameters that we want (will be
        # something like params_0)
        param_ref_key = "/{}/param_ref".format(query)
        params = old_store[param_ref_key]\
            .reset_index()\
            .set_index(['sigma', 'phi'])['index']\
            .ix[(sigma, phi)]

        # get the data
        key = "/{}/{}".format(query, params)
        ipe = old_store[key]\
            .groupby('block')\
            .get_group('B')\
            .rename(columns={'kappa0': 'kappa'})\
            .set_index(['stimulus', 'kappa', 'sample'])['response']\
            .unstack('kappa')[hyps]\
            .stack()

        # compute ipe likelihood
        print(query)
        llh_ipe = bootstrap_llh(compute_llh, ipe, fb)
        llh_ipe['counterfactual'] = False
        llh_ipe['likelihood'] = 'ipe_' + query
        all_llh = all_llh.append(llh_ipe)

        print(query + ' cf')
        llh_ipe_cf = bootstrap_llh(compute_llh_counterfactual, ipe, fb)
        llh_ipe_cf['counterfactual'] = True
        llh_ipe_cf['likelihood'] = 'ipe_' + query
        all_llh = all_llh.append(llh_ipe_cf)

    old_store.close()

    results = all_llh\
        .set_index(['likelihood', 'counterfactual', 'stimulus', 'kappa0', 'hypothesis'])\
        .sortlevel()

    assert not np.isnan(results['median']).any()
    assert not np.isinf(results['median']).any()

    results.to_csv(dest)
Ejemplo n.º 19
0
def index(filename, nlp):
    """
    Perform indexing.  Each document is stemmed, and then the non-excluded
    dimensions are counted for that document.  The result is put into the
    DocumentsToDimensions table.
    """
    conn = sqlite3.connect(filename)
    c = conn.cursor()
    params = util.get_params(c, filename)
    stemmer = params['stemmer']
    print('index(): stemmer: %s' % stemmer)

    all_dim = util.get_dimensions(c, 0)
    assert all_dim, "You must calculate dimensions prior to indexing."

    all_include = util.get_all_include_regex(c)

    c.execute('SELECT COUNT(ED_ENC_NUM) FROM Documents')
    num_total_docs = int(c.fetchone()[0])

    c.execute('DELETE FROM DocumentsToDimensions')

    c.execute("SELECT COUNT(*) FROM Dimensions WHERE PartOfSpeech = 'bigram'")
    nBigrams = int(c.fetchone()[0])
    print('Number of bigrams: ', nBigrams)
    do_bigrams = nBigrams > 0

    c.execute("SELECT COUNT(*) FROM Dimensions WHERE PartOfSpeech = 'trigram'")
    nTrigrams = int(c.fetchone()[0])
    print('Number of trigrams: ', nTrigrams)
    do_trigrams = nTrigrams > 0

    #
    # If the POS column contains "unigram", then it means we didn't perform POS tagging when calculating dimensions.
    #
    c.execute("SELECT COUNT(*) FROM Dimensions WHERE PartOfSpeech = 'unigram'")
    pos_tag = int(c.fetchone()[0]) == 0

    cmd = 'SELECT ED_ENC_NUM FROM Documents'
    # if options.limit:
    #    cmd += ' LIMIT %d' % options.limit
    #    num_total_docs = min(options.limit, num_total_docs)

    #
    # TODO: why is fetchmany not working?
    #
    #document_ids = c.execute(cmd).fetchmany()
    document_ids = []
    for row in c.execute(cmd):
        document_ids.append(row[0])
    print("fetched %d document ids" % len(document_ids))

    #
    # Terminate the SQL connection so that the subprocesses can use it.
    #
    conn.commit()
    conn.close()

    #
    # https://docs.python.org/2/library/array.html#module-array
    #

    main_process(nlp, document_ids, filename, stemmer, all_include, pos_tag,
                 do_bigrams, do_trigrams, all_dim)

    conn = sqlite3.connect(filename)
    c = conn.cursor()
    for dim_id, _, _ in all_dim:
        c.execute(
            """SELECT COUNT(DimensionId)
                FROM DocumentsToDimensions
                WHERE DimensionId = ?""", (dim_id, ))
        freq = int(c.fetchone()[0])
        idf = log10(num_total_docs / (1 + freq))
        c.execute('UPDATE Dimensions SET IDF = ? WHERE DimensionId = ?',
                  (idf, dim_id))

    #
    # Save and exit.
    #
    conn.commit()
    c.close()
Ejemplo n.º 20
0
def generate_features(track_ids, audio_paths, ground_truths, params, audio_params, param_file, logdir,
                      feature_path_root='features', normalize=False):

    if not os.path.exists(feature_path_root):
        print(f'Feature root directory does not yet exist. Creating {feature_path_root}.')
        os.makedirs(feature_path_root)

    # Go through each directory in feature root path and check if parameters are the same
    # If a match is found, load those features. If not, generate a new directory and features.
    feature_dirs = [os.path.join(feature_path_root, name) for name in os.listdir(feature_path_root)
                    if os.path.isdir(os.path.join(feature_path_root, name))]

    directory_found = False

    for feature_dir in feature_dirs:

        # Get parameter dictionary from directory
        param_path = f'{feature_dir}/params.json'
        if os.path.exists(param_path):
            param_stored, audio_param_stored, _ = get_params(param_path)
        else:
            print(f'Paramater file missing in {feature_dir}.')
            continue

        # Compare with current parameters
        if audio_param_stored == audio_params:

            directory_found = True
            print(f'Found matching feature directory: {feature_dir}.')

            # Calculate missing feature arrays
            for k, track_id in enumerate(tqdm(track_ids)):
                if not os.path.isfile(f'{feature_dir}/{track_id}.npy'):
                    print(f'[{k}/{len(track_ids)}] Calculating missing features for {track_id}.')

                    features, ground_truth, length = features_and_annotation(track_id, audio_paths, ground_truths, params)

                    np.save(f'{feature_dir}/{track_id}.npy', features)
                    np.save(f'{feature_dir}/{track_id}_truth.npy', ground_truth)
                    np.save(f'{feature_dir}/{track_id}_length.npy', length)

            break

    # If no directory with current parameters has been found, create it and store params
    if not directory_found:

        feature_dir = os.path.join(feature_path_root, f'features_{int(time.time())}')
        print(f'Creating new feature directory: {feature_dir}.')
        os.makedirs(feature_dir)

        # Copy parameters
        print('Writing parameter file.')
        copyfile(param_file, f'{feature_dir}/params.json')

        # Calculate missing feature arrays
        for k, track_id in enumerate(tqdm(track_ids)):
            if not os.path.isfile(f'{feature_dir}/{track_id}.npy'):
                print(f'[{k}/{len(track_ids)}] Calculating missing features for {track_id}.')

                features, ground_truth, length = features_and_annotation(track_id, audio_paths, ground_truths, params)

                np.save(f'{feature_dir}/{track_id}.npy', features)
                np.save(f'{feature_dir}/{track_id}_truth.npy', ground_truth)
                np.save(f'{feature_dir}/{track_id}_length.npy', length)


    # Find normalisation factors
    norm_file = f'{logdir}/norm.pkl'
    if normalize and not os.path.isfile(norm_file):
        print('Calculating normalisation factors.')
        count = 0
        sums = []
        mins = []
        maxs = []
        for k, track_id in enumerate(tqdm(track_ids)):
            feat = np.load(f'{feature_dir}/{track_id}.npy')

            sums.append(np.sum(feat, axis=1, keepdims=True))
            mins.append(np.min(feat, axis=1, keepdims=True))
            maxs.append(np.max(feat, axis=1, keepdims=True))

            count += feat.shape[1]

        mean = np.sum(np.concatenate(sums, axis=1), axis=1, keepdims=True) / count
        max_val = np.max(np.concatenate(maxs, axis=1), axis=1, keepdims=True) - mean
        min_val = np.min(np.concatenate(mins, axis=1), axis=1, keepdims=True) - mean

        # norm = np.max(np.concatenate([max_val, np.abs(min_val)], axis=1), axis=1, keepdims=True)

        # Calculate variance
        variances = []
        for k, track_id in enumerate(tqdm(track_ids)):
            feat = np.load(f'{feature_dir}/{track_id}.npy')

            variances.append(np.sum(np.square(feat - mean), axis=1, keepdims=True))

        var = np.sum(np.concatenate(variances, axis=1), axis=1, keepdims=True) / count
        # Normalize by standard deviation
        norm = np.sqrt(var)

        norm_dict = {'mean': mean,
                     'norm': norm,
                     'min_val': min_val,
                     'max_val': max_val}

        joblib.dump(norm_dict, norm_file)

    print('Features complete.')

    return feature_dir
Ejemplo n.º 21
0
def calc_dim(path, limit=0, pos_tag=False, language='en'):
    """
    Stems each document.  Determines all possible dimensions.  Creates
    dimensions-related tables and populates them.

    conn    The connection to the database to work with.
    stemmer The stemmer to use.
    limit   The number of documents to process.  If zero, all documents.
    bigrams Process bigrams.
    trigrams
            Process trigrams.
    """
    conn = sqlite3.connect(path)
    c = conn.cursor()
    params = util.get_params(c, path)

    stemmer = params['stemmer']
    bigrams = params['bigrams']
    trigrams = params['trigrams']
    print 'calc_dim(): stemmer: %s bigrams: %s trigrams: %s' % (
        stemmer, bigrams, trigrams)

    exclude = util.get_all_exclude_regex(c)
    include = util.get_all_include_regex(c)

    stemmers = {
        'porter': nltk.PorterStemmer(),
        'lancaster': nltk.LancasterStemmer()
    }
    try:
        stemmer = stemmers[stemmer]
    except KeyError:
        print 'unsupported stemmer:', stemmer
        return 1

    num_doc = 0
    c.execute('SELECT ED_ENC_NUM FROM Documents')
    for doc in c:
        num_doc += 1
    cmd = 'SELECT ED_ENC_NUM, NOTE_TEXT, Score FROM Documents'
    if limit:
        cmd += ' LIMIT %d' % limit
        num_doc = min(limit, num_doc)
    c.execute(cmd)

    all_words = set()
    all_bigrams = set()
    all_trigrams = set()
    all_inclusions = set()

    all_doc = []
    for i, (num, raw, score) in enumerate(c):
        if i % 100 == 0:
            print 'calc_dim(): processing document %s (%d/%d)' % (str(num), i +
                                                                  1, num_doc)
        proc = process_document(raw, stemmer, include, pos_tag, bigrams,
                                trigrams)
        all_words = all_words.union(set(proc['stemmed']))

        all_inclusions = all_inclusions.union(set(proc['inclusions']))

        if bigrams:
            all_bigrams = all_bigrams.union(set(proc['bigrams']))
        if trigrams:
            all_trigrams = all_trigrams.union(set(proc['trigrams']))

        all_doc.append(num)

    all_words = list(all_words)
    all_words.sort()

    all_bigrams = list(all_bigrams)
    all_bigrams.sort()

    all_trigrams = list(all_trigrams)
    all_trigrams.sort()

    all_inclusions = list(all_inclusions)
    all_inclusions.sort()

    init_dim(c)
    populate_dim(c, all_words, all_bigrams, all_trigrams, all_inclusions,
                 exclude)

    c.execute("SELECT COUNT(*) FROM Dimensions")
    nDims = int(c.fetchone()[0])
    #
    # Save and exit.
    #
    c.close()
    conn.commit()
Ejemplo n.º 22
0
    def run_step(self, prev, params):
        df = prev

        custom_params, url = get_params(params.get('url')), params.get('url')

        names = {
            'municipality_code': 'mun_id',
            'state_code': 'ent_id',
            'foreign_destination_origin': 'partner_country',
            'trade_flow': 'flow_id',
            'product_2d': 'hs2_id',
            'product_4d': 'hs4_id',
            'product': 'hs6_id'
        }
        df.rename(columns=names, inplace=True)

        if 'unanonymized' in params.get('table'):
            logging.debug('Unanonymized values...')
            df.drop(columns=['value'], inplace=True)
            df.rename(columns={'unanonymized_value': 'value'}, inplace=True)
        else:
            logging.debug('Anonymized values...')
            df.drop(columns=['unanonymized_value'], inplace=True)

        # negative values
        df.value.replace('C', np.nan, inplace=True)
        df.value = df.value.astype('float')
        df = df.loc[df.value > 0].copy()

        # iso3 names
        df['partner_country'] = df['partner_country'].str.lower()

        # fill columns
        level = ['hs6_id', 'hs4_id', 'hs2_id']
        for i in level:
            if i != custom_params['depth']:
                df[i] = 0

        # drop date, create time dimension
        for k, v in custom_params['datetime'].items():
            df[k] = v
        df.drop(columns='date', inplace=True)

        # hs codes
        df[custom_params['depth']] = df[custom_params['depth']].astype(
            'str').str.zfill(get_number(custom_params['depth']))
        for row in df[custom_params['depth']].unique():
            df[custom_params['depth']].replace(row,
                                               hs6_converter(row),
                                               inplace=True)

        for col in df.columns[df.columns != 'partner_country']:
            df[col] = df[col].astype('float').round(0).astype('int')

        # drop null trade values
        df.dropna(subset=['value'], inplace=True)

        # national ent id
        if 'National' in url:
            df['ent_id'] = 0

        # explicit level name
        df['level'] = int(custom_params['level'][2])
        df['product_level'] = int(
            re.findall(r"(\d){1}", custom_params['depth'])[0])

        # debug
        df['url'] = url

        return df
Ejemplo n.º 23
0
    def run_step(self, prev, params):
        df = prev

        custom_params, url = get_params(params.get('url')), params.get('url')

        names = {
            'municipality_code': 'mun_id',
            'state_code': 'ent_id',
            'foreign_destination_origin': 'partner_country',
            'trade_flow': 'flow_id',
            'product_2d': 'hs2_id',
            'product_4d': 'hs4_id',
            'product': 'hs6_id'
        }
        df.rename(columns=names, inplace=True)

        if 'unanonymized' in params.get('table'):
            logging.debug('Unanonymized values...')
            df.drop(columns=['value'], inplace=True)
            df.rename(columns={'unanonymized_value': 'value'}, inplace=True)
        else:
            logging.debug('Anonymized values...')
            df.drop(columns=['unanonymized_value'], inplace=True)

        # negative values
        df.value.replace('C', np.nan, inplace=True)
        df.value = df.value.astype('float')
        df = df.loc[df.value > 0].copy()

        # iso3 names
        df['partner_country'] = df['partner_country'].str.lower()

        # fill columns
        level = ['hs6_id', 'hs4_id', 'hs2_id']
        for i in level:
            if i != custom_params['depth']:
                df[i] = 0

        # drop date, create time dimension
        for k, v in custom_params['datetime'].items():
            df[k] = v
        df.drop(columns='date', inplace=True)

        # hs codes
        df[custom_params['depth']] = df[custom_params['depth']].astype(
            'str').str.zfill(get_number(custom_params['depth']))
        for row in df[custom_params['depth']].unique():
            df[custom_params['depth']].replace(row,
                                               hs6_converter(row),
                                               inplace=True)

        for col in df.columns[df.columns != 'partner_country']:
            df[col] = df[col].astype('float').round(0).astype('int')

        # drop null trade values
        df.dropna(subset=['value'], inplace=True)

        # national ent id
        if 'National' in url:
            df['nat_id'] = "mex"

        # explicit level name
        df['product_level'] = int(
            re.findall(r"(\d){1}", custom_params['depth'])[0])

        # debug
        df['url'] = url

        for id_ in ["ent_id", "mun_id"]:
            if id_ in df.columns:
                df.loc[df[id_] == 0, id_] = 33000 if id_ == 'mun_id' else 33

        df.loc[df.partner_country == "zya", "partner_country"] = "nld"
        df.loc[df.partner_country == "rom", "partner_country"] = "rou"
        df.loc[df.partner_country == "cia", "partner_country"] = "vat"
        df.loc[df.partner_country == "cur", "partner_country"] = "cuw"
        df.loc[df.partner_country == "cxi", "partner_country"] = "cxr"
        df.loc[df.partner_country == "dsm", "partner_country"] = "fsm"
        df.loc[df.partner_country == "fxa", "partner_country"] = "atf"
        df.loc[df.partner_country == "lhm", "partner_country"] = "hmd"
        df.loc[df.partner_country == "pty", "partner_country"] = "pcz"
        df.loc[df.partner_country == "xch", "partner_country"] = "iot"

        # Removing firms variable (requested)
        df.drop("firms", axis=1, inplace=True)

        return df
Ejemplo n.º 24
0
def mrmr(path, temp_dir):
    conn = sqlite3.connect(path)
    c = conn.cursor()

    c.execute('SELECT DimensionId FROM Dimensions')
    dimension_ids = [ d[0] for d in c.fetchall() ]

    mrmr_tmp = P.join(temp_dir, "mrmr-in.csv")
    fout = open(mrmr_tmp, 'w')
    fout.write(','.join([ 'Class' ] + map(str, dimension_ids)) + '\n')
    
    #
    # Output the CSV file for the mRMR utility.
    #
    c.execute('SELECT ED_ENC_NUM, Score FROM Documents')
    num_doc = 0
    for doc_id, score in c:
        #
        # Feature selection can only take place when we have labelled samples.
        #
        assert score in (-100, 100)
        c_inner = conn.cursor()
        nonzero = {}
        #
        # TODO: ignore disabled dimensions?
        #
        c_inner.execute("""SELECT DimensionId, Count
                FROM DocumentsToDimensions where ED_ENC_NUM = ?""",
                (doc_id,))
        for dim_id, count in c_inner:
            nonzero[dim_id] = count
        values = [ str(score/100) ]
        for dim in dimension_ids:
            if dim in nonzero:
                values.append(nonzero[dim])
            else:
                values.append(0)
        fout.write(','.join(map(str, values)) + '\n')
        num_doc += 1

    fout.close()

    #
    # Run the mRMR utility.
    #
    params = util.get_params(c, path)

    cmd = [ MRMR, '-i', mrmr_tmp, '-s', str(num_doc),
            '-v', str(len(dimension_ids)) ] + params['MRMR'].split(' ')
    print 'command line:', ' '.join(cmd)
    p = sub.Popen(cmd, bufsize=1, stdout=sub.PIPE, stderr=sub.STDOUT)

    #
    # This blocks until the underlying process completes, so can appear 
    # unresponsive.  Don't do this.
    #
    # stdout, stderr = p.communicate()

    #
    # Parse the output, enable/disable the required features.
    # There's a warning about buffers filling up and blocking the process if
    # things are done this way 
    # (http://docs.python.org/library/subprocess.html#subprocess.Popen.kill).
    # However, in our case, there isn't THAT much data to worry about -- it's
    # more important to output it as it's coming in so it looks like the
    # application is actually doing something.
    #
    result = {}
    regex = re.compile('(\\d+) \t (\\d+) \t (\\d+) \t (\\d+\\.\\d+)')

    #
    # Argh, stdout is still being buffered...
    # TODO: try https://bitbucket.org/geertj/winpexpect/wiki/Home
    #
    while True:
        line = p.stdout.readline()
        if not line:
            break
        print line,
        match = regex.search(line)
        if not match:
            continue
        order, fea, name, score = match.groups()
        result[int(order)] = (int(fea), int(name), float(score))
    print

    selected = sorted([ (result[k][1], result[k][2]) for k in result ])
    c.execute('UPDATE Dimensions SET Exclude = 1')
    for dim in selected:
        c.execute(
                'UPDATE Dimensions SET Exclude = 0, MRMR = ? WHERE DimensionId = ?', 
                (dim[1], dim[0]))
    
    conn.commit()
    output_dim_table(c)
    c.close()
Ejemplo n.º 25
0
import numpy as np
import pylab as plt
import glob
import scipy.ndimage
import util

theta_coordinates = np.deg2rad(
    [-146, 0, 45, 90, 0, 180, 170, 190, 200, 0, 270,
     315])  #np.arange(0, 360, 45))
theta_coordinates = np.deg2rad([0, 45, 90, 135, 180, 225, 270, 315])
print theta_coordinates

params = util.get_params(location="LaSilla")

ff = params['ff']
k1 = params['k1']
k2 = params['k2']
r0 = params['r0']
cx = params['cx']
cy = params['cy']
north = params['north']
deltatetha = params['deltatetha']

coordinatesx = np.cos(north + theta_coordinates) * r0 + cx
coordinatesy = np.sin(north + theta_coordinates) * r0 + cy

list_of_image = glob.glob("current*.JPG")
for fnimg in list_of_image:

    im = scipy.ndimage.imread(fnimg)
    ar = np.array(im)
Ejemplo n.º 26
0
import numpy as np
import pylab as plt
import glob
import scipy.ndimage
import util


theta_coordinates = np.deg2rad([-146,0,45,90,0,180,170,190,200,0, 270, 315])#np.arange(0, 360, 45))
theta_coordinates = np.deg2rad([0, 45, 90, 135, 180, 225, 270, 315])
print theta_coordinates


params = util.get_params(location="LaSilla")

ff = params['ff']
k1 = params['k1']
k2 = params['k2']
r0 = params['r0']
cx = params['cx']
cy = params['cy']
north = params['north']
deltatetha = params['deltatetha']

coordinatesx = np.cos(north + theta_coordinates) * r0 + cx
coordinatesy = np.sin(north + theta_coordinates) * r0 + cy

list_of_image = glob.glob("current*.JPG")
for fnimg in list_of_image:
	
	im = scipy.ndimage.imread(fnimg)
	ar = np.array(im)
Ejemplo n.º 27
0
from util import get_params, trade, calculate_lt_returns

#get parameters
param = get_params()
sim_results = []
dist_results = []
day_totals = []
sim_counter = 0

for i in range(param.n_sims):
    sim_counter += 1
    start_value = param.portfolio
    lt_alloc = param.lt_alloc
    st_alloc = param.st_alloc
    cash = param.cash_alloc
    lt_holding_pct = param.lt_holding_pct
    lt_holding_amt = 1 / lt_holding_pct
    rebalance = param.rebalance_period
    lt_holdings = []
    lt_returns = 0
    end_sim = False

    #ensure allocations equal 100% of portfolio
    if lt_alloc + st_alloc + cash != 1:
        print('ERROR: Allocations must equal 100%')
        exit()

        #print initial parameters
    print('\n' + '-' * 61)
    print('Initial Simulation Parameters:')
    print('Starting portfolio value: ${:.2f}'.format(start_value))