コード例 #1
0
class Stats(object):
    """
    This class is responsible for creating and updating database entry(document in Elasticsearch DB)
    There are two usage options:
    1. without arguments - as a based class of TestStatsMixin - for saving test statistics
    2. with arguments - as a separate object to update an existing document
    """
    def __init__(self, *args, **kwargs):
        self._test_index = kwargs.get('test_index', None)
        self._test_id = kwargs.get('test_id', None)
        self._es_doc_type = "test_stats"
        self.es = ES()
        self._stats = {}
        if not self._test_id:
            super(Stats, self).__init__(*args, **kwargs)

    def create(self):
        self.es.create_doc(index=self._test_index,
                           doc_type=self._es_doc_type,
                           doc_id=self._test_id,
                           body=self._stats)

    def update(self, data):
        """
        Update document
        :param data: data dictionary
        """
        try:
            self.es.update_doc(index=self._test_index,
                               doc_type=self._es_doc_type,
                               doc_id=self._test_id,
                               body=data)
        except Exception as ex:
            logger.error('Failed to update test stats: test_id: %s, error: %s',
                         self._test_id, ex)
コード例 #2
0
def removeArticleTag(request):
    try:
        article_id = request.GET['article_id']
        label = request.GET['label']
        tag = request.GET['tag']
        #label = labelMap[label] ####
        print "removeArticleTag"
        articleDAO = ArticleDAO('articles_testN')
        flag, update = articleDAO.removeTag(article_id, tag)

        url = "http://localhost:9200/news_spider_db/articles_testN/" + str(
            article_id) + "/_update"
        print url
        es = ES()
        doc = {"doc": update}
        # print doc
        # 更新
        result = es.post(url, doc)
        #print "remove aticle result:",result
        message = "success"
        return HttpResponse(json.dumps(message),
                            content_type="application/json")
    except BaseException, e:
        logging.error(e)
        print e
        print traceback.print_exc()
        return HttpResponse(json.dumps("failed"),
                            content_type="application/json")
コード例 #3
0
 def __init__(self, sc, data_path, tmdb_key):
     self.sc = sc
     self.sqlContext = SQLContext(self.sc)
     self.data_path = data_path
     self.es = ES(self.sc, self.sqlContext)
     tmdb.API_KEY = tmdb_key
     self.tmdb_key = tmdb_key
コード例 #4
0
def es(game, render, config, generations, sigma, seed, random_noise_size, classic_es,
       activation, gain, optimize, mutate, num_parents, no_videos, big_net, novelty):
    timestamp = datetime.datetime.now()
    optimize = opt_modes[optimize]
    activation = act_modes[activation]

    if config == "default":
        config = "configurations/default_atari_config.json"
        with open(config, 'r') as f:
            config = json.loads(f.read())
        game = game[0].capitalize() + game[1:]
        i = short_names.index(game)
        config['env_id'] = envs[i]
        config['env_short'] = short_names[i]
    else:
        with open(config, 'r') as f:
            config = json.loads(f.read())

    path = "save/{}-{}_{}".format(config["env_short"], str(timestamp.date()), str(timestamp.time()))
    txt = "Log {}.log\n\nWith parameters: \ngame={} ({}) \nconfig={} \ngenerations={} \nsigma={} \nseed={}\nrandom_noise_size={} \
           \nclassic_es={} \n(xavier) gain={} \nactivation={} \noptimize={}\nmutate={} parameters\nnum_parents={} workers\nbig_net={}\nnovelty={}\n\
           ".format(path, config['env_short'], config['env_id'], config, generations, sigma, seed, random_noise_size, classic_es,
                    gain, activation, optimize, "all" if mutate == 1 else "1/{} of".format(mutate),
                    "all" if num_parents == 1 else "1/{} of".format(num_parents), big_net, novelty)

    worker = ES(config, rand_num_table_size=random_noise_size, sigma=sigma, seed=seed, render=render,
                verbose=True, log_path=path, initial_text=txt, classic_es=classic_es, gain=gain,
                activation=activation, optimize=optimize, mutate=mutate, no_videos=no_videos,
                big_net=big_net, novelty=novelty, num_parents=num_parents)
    worker(generations)
    worker.save(path + '.es')
コード例 #5
0
 def __init__(self, *args, **kwargs):
     self._test_index = kwargs.get('test_index', None)
     self._test_id = kwargs.get('test_id', None)
     self._es_doc_type = "test_stats"
     self.es = ES()
     self._stats = {}
     if not self._test_id:
         super(Stats, self).__init__(*args, **kwargs)
コード例 #6
0
def test_fetch_domain(monkeypatch):
    es = ES('ip', 'domain')

    monkeypatch.setattr(es.es, 'describe_elasticsearch_domain',
                        fake_es_describe)

    config = es.fetch_config()
    assert isinstance(config, dict)
コード例 #7
0
def test_extract_access_list(monkeypatch):
    es = ES('192.168.0.2/32', 'domain')
    data = {
        "DomainStatus": {
            "AccessPolicies":
            '{"Statement":[{"Condition":{"IpAddress":{"aws:SourceIp":["192.168.0.1/32"]}}}]}'
        }
    }
    config, ips = es.append_ip_acl(data)
    assert ips == ["192.168.0.1/32", "192.168.0.2/32"]
    assert isinstance(config, dict)
コード例 #8
0
def handler(event, context):
    '''
    :param event:
    :param context:
    :return:
    '''
    event = json.loads(event['Records'][0]['Sns']['Message'])
    feed = event['feed']
    iocfp = IOCFeedParser(event)
    elasticsearch = ES(es_host=event['es_endpoint'])
    feed_data = iocfp.get_feed(feed)
    parsed_feed = iocfp.proccess_feed_data(feed_data, feed)
    iocfp.save_to_s3(feed_data, feed)
    elasticsearch.bulk_es_index_dataframe(es_index='iocs', df=parsed_feed)
コード例 #9
0
def changeLabel(request):
    try:
        article_id = request.GET['article_id'].strip()
        label = request.GET['label']
        reverseMap = {"0": "1", "1": "0"}
        rLabel = reverseMap[label]
        logging.info("[changeLabel] article_id=" +article_id +" label=" \
            + label + "  rLabel=" + rLabel)

        #label = labelMap[label]
        #reverseLabel = labelMap[reverseLabel]
        articleDAO = ArticleDAO('articles_testN')
        article = articleDAO.show_article(article_id)
        article.pop("_id")
        article.pop("id")
        article['article_label'] = int(rLabel)
        user = request.session.get('user', default=None)

        if user['role'] == "0":
            article['article_label_state'] = 2
            article['update_admin'] = user['username']
        elif user['role'] == "1":
            article['article_label_state'] = 1
            article['update_student'] = user['username']

        #article['article_label_state'] = 0
        result = articleDAO.update_article(article_id, article)

        url = "http://localhost:9200/news_spider_db/articles_testN/" + str(
            article_id) + "/_update"
        es = ES()
        doc = {"doc": {"article_label": int(rLabel)}}
        # 更新
        es.post(url, doc)
        logging.info("[changeLabel] result=" + str(result))

        if result:
            return HttpResponse(
                json.dumps('{"label":"' + str(rLabel) + '","article_id":"' +
                           str(article_id) + '"}'),
                content_type="application/json")
        else:
            return HttpResponse(json.dumps('{"label":"failed"}'),
                                content_type="application/json")
    except BaseException, e:
        logging.error(e)
        return HttpResponse(json.dumps('{"label":"failed"}'),
                            content_type="application/json")
コード例 #10
0
ファイル: agent.py プロジェクト: dingidng/PARL_code_example
    def __init__(self, action_space):
        """Initialize a new agent."""

        self.action_space = action_space

        self.actions = []
        actions_vec = np.load("./saved_files/top1000_actions.npz")["actions"]
        for i in range(actions_vec.shape[0]):
            act = action_space.from_vect(actions_vec[i])
            self.actions.append(act)

        self.actions = self.actions[:1000]
        self.act_num = len(self.actions)
        self.sub_ids = np.load('./saved_files/sub_id_info.npz')['sub_ids']
        self.do_nothing_action = action_space({})
        self.origin_ids = range(len(self.actions))

        offset = action_space.n_line
        self.action_to_sub_topo = {}
        for sub_id, sub_elem_num in enumerate(action_space.sub_info):
            self.action_to_sub_topo[sub_id] = (offset, offset + sub_elem_num)
            offset += sub_elem_num
        self.step = 0

        model = PowerNetModel()
        algorithm = ES(model)
        self.es_agent = ESAgent(algorithm)
        self.es_agent.restore(save_path='./saved_files', filename='model.ckpt')

        self.to_print_data = []

        self.last_disconnect_step = -100
        self.last_diconnect_line = None
        self.simulation_times = 0
コード例 #11
0
    def __init__(self, config):
        self.config = config

        env = gym.make(self.config['env_name'])
        self.config['obs_dim'] = env.observation_space.shape[0]
        self.config['act_dim'] = env.action_space.shape[0]

        self.obs_filter = MeanStdFilter(self.config['obs_dim'])
        self.noise = SharedNoiseTable(self.config['noise_size'])

        model = MujocoModel(self.config['act_dim'])
        algorithm = ES(model)
        self.agent = MujocoAgent(algorithm, self.config)

        self.latest_flat_weights = self.agent.get_flat_weights()
        self.latest_obs_filter = self.obs_filter.as_serializable()

        self.sample_total_episodes = 0
        self.sample_total_steps = 0

        self.actors_signal_input_queues = []
        self.actors_output_queues = []

        self.create_actors()

        self.eval_rewards_stat = WindowStat(self.config['report_window_size'])
        self.eval_lengths_stat = WindowStat(self.config['report_window_size'])
コード例 #12
0
 def __init__(self,
              es_index,
              es_doc_type,
              send_email=False,
              email_recipients=(),
              email_template_fp="",
              query_limit=1000,
              logger=None):
     self._es = ES()
     self._conf = self._es._conf
     self._es_index = es_index
     self._es_doc_type = es_doc_type
     self._limit = query_limit
     self._send_email = send_email
     self._email_recipients = email_recipients
     self._email_template_fp = email_template_fp
     self.log = logger if logger else log
コード例 #13
0
def approval(request):
    ## 审核
    try:
        article_id = request.GET['article_id'].strip()
        label = request.GET['label']
        label = labelMap[label]
        articleDAO = ArticleDAO('articles_testN')
        article = articleDAO.show_article(article_id)
        article.pop("id")
        article.pop("_id")
        user = request.session.get('user', default=None)
        if user['role'] == "0":
            article['article_label_state'] = 2
            article['update_admin'] = user['username']
        elif user['role'] == "1":
            article['article_label_state'] = 1
            article['update_student'] = user['username']

        #article['article_label_state'] = 0
        result = articleDAO.update_article(article_id, article)

        url = "http://localhost:9200/news_spider_db/articles_testN/" + str(
            article_id) + "/_update"
        es = ES()
        doc = {
            "doc": {
                "article_label_state": article['article_label_state'],
                "update_admin": article['update_admin'],
                "update_student": article['update_student']
            }
        }
        # 更新
        es.post(url, doc)

        if result:
            return HttpResponse(json.dumps("success"),
                                content_type="application/json")
        else:
            return HttpResponse(json.dumps("failed"),
                                content_type="application/json")
    except BaseException, e:
        logging.error(e)
        return HttpResponse(json.dumps("failed"),
                            content_type="application/json")
コード例 #14
0
ファイル: test_es.py プロジェクト: dmathieu/datagouvfr
def test_add_create_index_with_mapping():
    es = ES(None, 'datagouvfr_test-index-mapping', mapping)
    assert not elastic.indices.exists(index = 'datagouvfr_test-index-mapping-2020-01-01')
    es.add({"day": "2020-01-01"})
    es.commit()
    assert elastic.indices.exists(index = 'datagouvfr_test-index-mapping-2020-01-01')
    assert mapping['mappings'] == elastic.indices.get(index = 'datagouvfr_test-index-mapping-2020-01-01')['datagouvfr_test-index-mapping-2020-01-01']['mappings']
コード例 #15
0
    def __init__(self, config):
        self.config = config

        self.env = gym.make(self.config['env_name'])
        self.config['obs_dim'] = self.env.observation_space.shape[0]
        self.config['act_dim'] = self.env.action_space.shape[0]

        self.obs_filter = MeanStdFilter(self.config['obs_dim'])
        self.noise = SharedNoiseTable(self.config['noise_size'])

        model = MujocoModel(self.config['act_dim'])
        algorithm = ES(model)
        self.agent = MujocoAgent(algorithm, self.config)
コード例 #16
0
def get_results(results_path, update_db):
    bad_chars = " "
    os.chdir(os.path.join(results_path, "perf_fast_forward_output"))
    db = ES()
    results = {}
    for dirname in os.listdir(os.getcwd()):
        logger.info(dirname)
        for filename in os.listdir(dirname):
            new_filename = "".join(c for c in filename if c not in bad_chars)
            test_type = dirname + "_" + os.path.splitext(new_filename)[0]
            json_path = os.path.join(dirname, filename)
            with open(json_path, 'r') as f:
                logger.info("Reading: %s", json_path)
                datastore = json.load(f)
            datastore.update({'hostname': HOSTNAME})
            if update_db:
                db.create(index=ES_INDEX,
                          doc_type=test_type,
                          doc_id=TEST_ID,
                          body=datastore)
            results[test_type] = datastore
    return results
コード例 #17
0
def search():
    # accepts json payload with "query" key and "files_and_folders"
    # query is mandatory, files_and_folders default value is 1
    es = ES()
    content = request.get_data()
    post_input = json.loads(content)

    try:
        query = post_input['query']
    except KeyError:
        return 'No search query!'

    try:
        files_and_folders = int(post_input['files_and_folders'])
    except (KeyError, ValueError):
        files_and_folders = 1

    if files_and_folders:
        output = es.query_all(query)
    else:
        output = es.query_files(query)
    return output
コード例 #18
0
class StdOutListener(StreamListener):
	counter = 0
	total_docs_to_be_indexed = 1000

	def __init__(self,*args,**kwargs):
		super(StdOutListener, self).__init__(*args, **kwargs)
		self.es = ES().getES()

	def on_data(self,data):
		print data
		while self.total_docs_to_be_indexed > self.counter:
			tweet = json.loads(data)
			self.index_tweet(tweet)
			self.counter += 1
			return True
		return False

	def index_tweet(self,tweet):
		self.es.index(index = 'twitter', doc_type = 'tweets', id = tweet['id_str'], body = tweet)

	def on_error(self,status):
		print "the status is: " + str(status)
		pass
コード例 #19
0
def addArticleTag(request):
    try:
        article_id = request.GET['article_id']
        label = request.GET['label']
        tag = request.GET['tag']
        #label = labelMap[label] ####
        articleDAO = ArticleDAO('articles_testN')
        flag, update = articleDAO.addTag(article_id, tag)

        message = "success"
        url = "http://localhost:9200/news_spider_db/articles_testN/" + str(
            article_id) + "/_update"
        es = ES()
        doc = {"doc": update}
        # 更新
        result = es.post(url, doc)
        print result
        return HttpResponse(json.dumps(message),
                            content_type="application/json")
    except BaseException, e:
        logging.error(e)
        return HttpResponse(json.dumps("failed"),
                            content_type="application/json")
コード例 #20
0
ファイル: test_es.py プロジェクト: dmathieu/datagouvfr
def test_es_add():
    es = ES(None, 'datagouvfr_test-add', mapping)
    assert not elastic.indices.exists(index = 'datagouvfr_test-add-2020-01-01')
    es.add({"hello": "world", "day": "2020-01-01"})
    es.commit()
    assert elastic.indices.exists(index = 'datagouvfr_test-add-2020-01-01')
    time.sleep(1)
    data = elastic.search(index = 'datagouvfr_test-add-2020-01-01')
    assert len(data['hits']['hits']) == 1
コード例 #21
0
	def __init__(self,*args,**kwargs):
		super(StdOutListener, self).__init__(*args, **kwargs)
		self.es = ES().getES()
コード例 #22
0
                              parsimony_pressure_w1=parsimonyW1,
                              parsimony_pressure_w2=parsimonyW2,
                              tourn_size=tournsize,
                              min_height=min_height,
                              max_height=max_height,
                              mut_min_height=mut_min_height,
                              mut_max_height=mut_max_height)

last_pop, log = gpsolver.solve(n_generations=n_generations)
best_fitness = log.chapters['fitness'].select('min')
avg_size = log.chapters['size'].select('avg')
np.savetxt('best_fits.txt', best_fitness)
np.savetxt('avg_size', avg_size)

######## saving best solution found #########
best = max(last_pop, key=operator.attrgetter("fitness"))
logging.info('best individual\'s fittness: ' + str(best.fitness))
draw(best, 'out/best_ind.pdf')

######### using evolution strategy ##########


def my_cost(constants):
    return gpsolver.simpleFitness(best, constants)[0]


logging.info('current constatns ' + str(constants))
mes = ES(len(constants), my_cost)
mes.set_ans(constants)
mes.evolve(iterations=1000)
logging.info('best found constants ' + str(mes.ans))
コード例 #23
0
def run_asebo(params):
        
    env = gym.make(params['env_name'])
    params['ob_dim'] = env.observation_space.shape[0]
    params['ac_dim'] = env.action_space.shape[0]
    
    m = 0
    v = 0

    params['k'] += -1
    params['alpha'] = 1
        
    params['zeros'] = False
    master = get_policy(params)
    
    if params['log']:
        params['num_sensings'] = 4 + int(3 * np.log(master.N))
    
    if params['k'] > master.N:
        params['k'] = master.N
        
    n_eps = 0
    n_iter = 1
    ts_cumulative = 0
    ts = []
    rollouts = []
    rewards = []
    samples = []
    alphas = []
    G = []
        
    while n_iter < params['max_iter']:
            
        params['n_iter'] = n_iter
        gradient, n_samples, timesteps = ES(params, master, G)
        ts_cumulative += timesteps
        ts.append(ts_cumulative)
        alphas.append(params['alpha'])

        if n_iter == 1:
            G = np.array(gradient)
        else:
            G *= params['decay']
            G = np.vstack([G, gradient])
        n_eps += 2 * n_samples
        rollouts.append(n_eps)
        gradient /= (np.linalg.norm(gradient) / master.N + 1e-8)
            
        update, m, v = Adam(gradient, m, v, params['learning_rate'], n_iter)
            
        master.update(update)
        test_policy = worker(params, master, np.zeros([1, master.N]), 0)
        reward = test_policy.rollout(train=False)
        rewards.append(reward)
        samples.append(n_samples)
            
        print('Iteration: %s, Rollouts: %s, Reward: %s, Alpha: %s, Samples: %s' %(n_iter, n_eps, reward, params['alpha'], n_samples))
        n_iter += 1
        
        out = pd.DataFrame({'Rollouts': rollouts, 'Reward': rewards, 'Samples': samples, 'Timesteps': ts, 'Alpha': alphas})
        out.to_csv('Seed%s.csv' %(params['seed']), index=False)        
コード例 #24
0
    if "Lat" in record and "Long_" in record and len(record["Lat"]) > 0 and len(record["Long_"]) > 0:
        new["location_point"] = f"{record['Lat']},{record['Long_']}"

    for source, destination in MAP_FIELDS.items():
        if source in record:
            if (len(record[source]) > 0):
                new[destination] = record[source]

    return new


def read_csv(file):
    file_base = os.path.basename(file)
    print(f"reading file '{file}'")
    if match := re.match('^(\d\d?)-(\d\d?)-(\d{4})', file_base):
        index_fragment = f"{match.group(3)}-{match.group(1)}-{match.group(2)}"
    else:
        index_fragment = f"file={file_base}"

    es = ES(index_fragment)

    with open(file) as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            timestamp = datetime.datetime.strptime(row['Last_Update'], '%Y-%m-%d %H:%M:%S')
            row['Last_Update'] = timestamp
            # es.post_record(convert_record(row))
            es.bulk_add(convert_record(row))
    es.bulk_write()

コード例 #25
0
ファイル: agent.py プロジェクト: dingidng/PARL_code_example
    def __init__(self, action_space):
        BaseAgent.__init__(self, action_space=action_space)
        self.simulate_times = 0

        unitary_action_model = UnitaryActionModel()
        algorithm = ES(unitary_action_model)
        self.unitary_es_agent = UnitaryESAgent(algorithm)

        combined_actions_model_1 = CombinedActionsModel()
        combined_actions_model_2 = CombinedActionsModel()
        ensemble_algorithm = EnsembleES(combined_actions_model_1,
                                        combined_actions_model_2)
        self.combine_es_agent = CombineESAgent(ensemble_algorithm)

        self.unitary_es_agent.restore('./saved_files',
                                      'unitary_action_model.ckpt')
        self.combine_es_agent.restore('./saved_files',
                                      'combined_actions_model.ckpt')

        unitary_actions_vec = np.load(
            "./saved_files/v6_top500_unitary_actions.npz")["actions"]
        self.unitary_actions = []
        for i in range(unitary_actions_vec.shape[0]):
            action = action_space.from_vect(unitary_actions_vec[i])
            self.unitary_actions.append(action)

        redispatch_actions_vec = np.load(
            "./saved_files/redispatch_actions.npz")["actions"]
        self.redispatch_actions = []
        for i in range(redispatch_actions_vec.shape[0]):
            action = action_space.from_vect(redispatch_actions_vec[i])
            self.redispatch_actions.append(action)

        with open("./saved_files/action_to_sub_id.pickle", "rb") as f:
            self.action_to_sub_id = pickle.load(f)

        self.after_line56_or_line45_disconnect_actions = []
        self.three_sub_action_to_sub_ids = {}

        actions_vec = np.load(
            "./saved_files/v10_merge_three_sub_actions.npz")["actions"]
        for i in range(actions_vec.shape[0]):
            action = action_space.from_vect(actions_vec[i])
            self.after_line56_or_line45_disconnect_actions.append(action)

        with open("saved_files/three_sub_action_to_sub_ids.pickle", "rb") as f:
            self.three_sub_action_to_sub_ids = pickle.load(f)

        self.used_combine_actions = False
        self.redispatch_cnt = 0
        self.max_redispatch_cnt = 3
        self.serial_actions = []

        self.do_nothing_action = action_space({})
        self.action_space = action_space

        offset = 59
        self.action_to_sub_topo = {}
        for sub_id, sub_elem_num in enumerate(action_space.sub_info):
            self.action_to_sub_topo[sub_id] = (offset, offset + sub_elem_num)
            offset += sub_elem_num

        self.observation = None

        self.redispatch_months = set([3])
コード例 #26
0
from es import ES
from config import *


es = ES().getES()

if es.indices.exists(index = index_name):
	script = {
		'script': 'ctx._source.category=\"Programming\"'
	}
	es.update(index = index_name, doc_type = doc_type, body = script, id = '123', ignore = 404)
	# script	= {"script"	:"ctx._source.category+=tag",
	# 	"params":{
	# 	"tag": "Python"
	# }
	# es.update(index = index_name, doc_type = doc_type, body = script, id = '1', ignore=404)
コード例 #27
0
def main():
    ## create index
    es = ES()
    if FLAGS_FIRST_RUN:
        if es.check_existing_index(index_name=FLAGS_CORPUS_NAME,
                                   delete_existing=False):
            es.create_skipgram2eid_index(index_name=FLAGS_CORPUS_NAME,
                                         type_name="skipgram2eid")
            es.create_eid2skipgram_index(index_name=FLAGS_CORPUS_NAME,
                                         type_name="eid2skipgram")
            es.create_eid2eid_index(index_name=FLAGS_CORPUS_NAME,
                                    type_name="eid2eid")

        start = time.time()
        skipgram2id, skipgram2eidcounts, eid2skipgramcounts = util.load_skipgram2eidcounts(
            eidSkipgramFilePath)
        end = time.time()
        print("[INFO] Loading data using time %s (seconds)" % (end - start))

        start = time.time()
        eid2eid_w_strength = util.calculateEidSimilarity(skipgram2eidcounts)
        end = time.time()
        print("[INFO] Calculating eid-eid similarity using time %s (seconds)" %
              (end - start))

        es.index_skipgram2eid(index_name=FLAGS_CORPUS_NAME,
                              type_name="skipgram2eid",
                              skipgram2id=skipgram2id,
                              skipgram2eidcounts=skipgram2eidcounts)

        es.index_eid2skipgram(index_name=FLAGS_CORPUS_NAME,
                              type_name="eid2skipgram",
                              eid2skipgramcounts=eid2skipgramcounts)
        es.index_eid2eid(index_name=FLAGS_CORPUS_NAME,
                         type_name="eid2eid",
                         eid2eid_w_strength=eid2eid_w_strength)

        es.match_all(index_name=FLAGS_CORPUS_NAME, type_name="skipgram2eid")
        es.match_all(index_name=FLAGS_CORPUS_NAME, type_name="eid2skipgram")
        es.match_all(index_name=FLAGS_CORPUS_NAME, type_name="eid2eid")

    eid2ename, ename2eid = util.loadEidToEntityMap(eidEnameFilePath)
    eid2types = util.loadEidToTypeMap(eidTypeFilePath, ename2eid=ename2eid)

    # userInput = ["NBA", "NCAA", "NFL"] # sports league, good performance
    userInput = ["BBC", "HBO", "CNN", "Fox",
                 "Channel 4"]  # TV Channel, good performance
    # userInput = ["Twitter", "Microsoft", "Lenovo", "Toyota", "Qualcomm"] # company, good performance
    # userInput = ["Toyota", "Hyundai", "Mazda", "Chrysler", "Ford"] # car company (top-30, avg.rank=10), good performance
    # userInput = ["Google", "Facebook", "Microsoft", "Amazon", "Twitter"] # high tech company, good performance
    #
    # userInput = ["United States", "China", "Japan", "germany", "England", "Russia", "India"] # country, using dist.sim
    # userInput = ["Illinois", "Texas", "California", "Ohio", "Maryland"] # state, using dist.sim

    seedEidsWithConfidence = [(ename2eid[ele.lower()], 0.0)
                              for ele in userInput]
    negativeSeedEids = set()
    params = SetExpanParams(index_name=FLAGS_CORPUS_NAME,
                            max_iter=10,
                            ensemble_batch=10,
                            num_of_top_skipgrams=150,
                            num_of_top_candidate_eids=50,
                            feature_subset_size_ratio=0.8,
                            average_rank=10,
                            skipgramDistLower=3,
                            skipgramDistUpper=30,
                            use_type=False)

    start = time.time()
    (expanded_eids, stop_iter) = setExpan(es,
                                          seedEidsWithConfidence,
                                          negativeSeedEids,
                                          eid2ename,
                                          eid2types,
                                          params,
                                          FLAGS_DEBUG=False)
    end = time.time()
    print("[INFO!!!] Finish SetExpan++ in %s seconds" % (end - start))
    for ele in expanded_eids:
        print(ele[0], eid2ename[ele[0]], ele[1])
コード例 #28
0
from config import *
from es import ES
import pprint 

prt = pprint.PrettyPrinter(indent=1)

es = ES().getES()
query = {
	"query":{
		"match_all":{}
	}
}

response = es.search(index = index_name, doc_type = doc_type, 
	body = query, size = 10, request_timeout = 10)

prt.pprint(response)

コード例 #29
0
"""
Run and plot the ES results for Ackley
"""
from es import ES
import helper


total_data = None
for h in range(30):
    print(h)
    es = ES(limits=[15.0]*30)
    
    # search 30 times and get med
    data = es.search(1000, 1e-5)
    
    if not total_data:
        total_data = [[], [],[],[], []]
        total_data[0] = data[0]
        total_data[1] = data[1]
        total_data[2] = data[2]
        total_data[3] = data[3]
        total_data[4] = data[4]
    else:
        for i in range(len(total_data[0])):
            total_data[1][i] += data[1][i]
            total_data[2][i] += data[2][i]
            total_data[3][i] += data[3][i]
            total_data[4][i] += data[4][i]

for i in range(len(total_data[0])):
    total_data[1][i] /= 30
コード例 #30
0
class Engine:
    def __init__(self, sc, data_path, tmdb_key):
        self.sc = sc
        self.sqlContext = SQLContext(self.sc)
        self.data_path = data_path
        self.es = ES(self.sc, self.sqlContext)
        tmdb.API_KEY = tmdb_key
        self.tmdb_key = tmdb_key

    def load_data_from_file(self):
        ratings_file_path = os.path.join(self.data_path, 'ratings.csv')
        ratings_raw_RDD = self.sc.textFile(ratings_file_path)
        ratings_header = ratings_raw_RDD.take(1)[0]
        rating_schema = StructType(\
        [StructField('userId', IntegerType(), True),\
        StructField('movieId', IntegerType(), True),\
        StructField('rating', FloatType(), True)]\
        )
        ratings_RDD = ratings_raw_RDD.filter(
            lambda line: line != ratings_header).map(
                lambda line: line.split(",")).map(
                    lambda x: (int(x[0]), int(x[1]), float(x[2]))).cache()
        self.ratings_RDD = ratings_RDD
        rating_df = self.sqlContext.createDataFrame(ratings_RDD, rating_schema)
        self.rating_df = rating_df

        ratings_dict_RDD = rating_df.rdd.map(lambda item:
                                             (item['movieId'], {
                                                 'userId': item['userId'],
                                                 'movieId': item['movieId'],
                                                 'rating': item['rating']
                                             }))
        #logger.info(ratings_dict_RDD.take(10))
        self.ratings_dict_RDD = ratings_dict_RDD

        movie_schema = StructType(\
        [StructField('movieId', IntegerType(), True),\
        StructField('title', StringType(), True),\
        StructField('genres', StringType(), True)]\
        )

        movies_file_path = os.path.join(self.data_path, 'movies.csv')
        movies_raw_RDD = self.sc.textFile(movies_file_path)
        movies_header = movies_raw_RDD.take(1)[0]
        movies_header_list = movies_header.split(",")

        self.movies_RDD = movies_raw_RDD.filter(
            lambda line: line != movies_header).map(lambda line: line.split(
                ",")).map(lambda x: (int(x[0]), x[1], x[2])).cache()

        movies_df = self.sqlContext.createDataFrame(self.movies_RDD,
                                                    movie_schema)
        self.movies_df = movies_df

        movies_dict_RDD = rating_df.rdd.map(lambda item:
                                            (item['movieId'], {
                                                'movieId': item['movieId'],
                                                'title': item['title'],
                                                'genres': item['genres']
                                            }))
        self.movies_dict_RDD = movies_dict_RDD

        self.rank = 10
        self.iterations = 10
        self.train()

        #TODO Get the image by using tmdb API and links.csv
        links_schema = StructType(\
        [StructField('movieId', IntegerType(), True),\
        StructField('imdbId', IntegerType(), True),\
        StructField('tmdbId', IntegerType(), True)]\
        )

        links_file_path = os.path.join(self.data_path, 'links.csv')
        links_raw_RDD = self.sc.textFile(links_file_path)
        links_header = links_raw_RDD.take(1)[0]
        links_header_list = links_header.split(",")

        self.links_RDD = links_raw_RDD.filter(
            lambda line: line != links_header).map(lambda line: line.split(
                ",")).map(lambda x: (int(x[0]), x[1], x[2])).cache()

        links_df = self.sqlContext.createDataFrame(self.links_RDD,
                                                   links_schema)
        self.links_df = links_df

        links_dict_RDD = links_df.rdd.map(lambda item:
                                          (item['movieId'], {
                                              'movieId': item['movieId'],
                                              'imdbId': item['imdbId'],
                                              'tmdbId': item['tmdbId']
                                          }))

    def get_predicted_rating(self, userId, movieId):
        predicted_rating_RDD = self.model.predict(userId, movieId)
        logger.info(predicted_rating_RDD)
        return redicted_rating_RDD

    def get_predicted_rating_from_file(self, file_name):
        data = self.sc.textFile(file_name)
        ratings = data.map(lambda l: l.split(',')).map(
            lambda l: Rating(int(l[0]), int(l[1]), float(l[2])))

        testdata = ratings.map(lambda p: (p[0], p[1]))
        predictions = self.model.predictAll(testdata).map(lambda r:
                                                          ((r[0], r[1]), r[2]))
        ratesAndPreds = ratings.map(lambda r: ((r[0], r[1]), r[2])).join(
            predictions)
        RMSE = math.sqrt(
            ratesAndPreds.map(lambda r: (r[1][0] - r[1][1])**2).mean())
        logger.info("RMSE = " + str(RMSE))
        #logger.info(predictions.collect())
        return predictions.collect()

    def get_es_ratingRDD(self):
        ratings = self.es.get_ratingRDD()
        return ratings

    def get_es_ratingRDD_by_userId(self, userId):
        ratings = self.es.get_ratingRDD_by_userId(userId)
        return ratings

    def get_es_ratingRDD_by_movieId(self, userId):
        ratings = self.es.get_ratingRDD_by_movieId(movieId)
        return ratings

    def get_es_ratingRDD_by_user_movie(self, userId, movieId):
        ratings = self.es.get_ratingRDD_by_user_movie(userId, movieId)
        return ratings

    def search_movie_tmdb(self, movie_name):
        search = tmdb.Search()
        response = search.movie(query=movie_name)
        #logger.info(response)
        data_list = []
        for s in search.results:
            data = {
                'title': s['title'],
                'date': s['date'],
                'popularity': s['popularity'],
                'id': s['id']
            }
            #logger.info(data)
            data_list.append(data)
        result = {'response': response, 'data': data_list}
        return result

    def create_es_index(self):
        self.es.create_index("movielens")

    def save_to_es(self):
        self.rating_df.write.format("es").save("movielens/ratings")
        self.movies_df.write.format("es").save("movielens/movies")
        self.links_df.write.format("es").save("movielens/links")

    def save_to_es_hadoop(self):
        es_write_conf = {
            "es.nodes": 'localhost',
            "es.port": '9200',
            "es.resource": 'movielens/ratings',
            #"es.input.json" : "yes"
            "es.mapping.id": "movieId"
        }
        self.ratings_dict_RDD.saveAsNewAPIHadoopFile(
            path='-',
            outputFormatClass="org.elasticsearch.hadoop.mr.EsOutputFormat",
            keyClass="org.apache.hadoop.io.NullWritable",
            valueClass="org.elasticsearch.hadoop.mr.LinkedMapWritable",
            conf=es_write_conf)
        return true

    def train(self):
        self.model = ALS.train(self.ratings_RDD, self.rank, self.iterations,
                               0.01)
        logger.info("ALS model")

    def topN_ratings_unrated_movies(self, user_id, count):
        unrated = self.ratings_RDD.filter(lambda x: not x[0] == user_id).map(
            lambda x: (user_id, x[1])).distinct()
        predicted_RDD = self.model.predictAll(unrated)
        total_RDD = self.ratings_RDD.union(predicted_RDD)
        list_predict_movie = predicted_RDD.map(
            lambda x: x.product).distinct().collect()
        predicted_movie_RDD = total_RDD.filter(
            lambda x: x[1] in list_predict_movie)

        predicted_groupby_product_rating_RDD = predicted_movie_RDD.map(
            lambda x: (x[1], x[2])).groupByKey()
        product_avgRating_count_RDD = predicted_groupby_product_rating_RDD.map(
            get_product_avgRating_count)

        filtered_RDD = product_avgRating_count_RDD.filter(
            lambda x: x[1][0] > 3 and x[1][1] > 30)
        ratings_list = filtered_RDD.takeOrdered(count, key=lambda x: -x[1][0])
        ratings_movie_id = [x[0] for x in ratings_list]
        movie_title_dict = self.es.get_movieTitleByMovieId(ratings_movie_id)
        result_list = [(x[0], movie_title_dict[x[0]], x[1][0], x[1][1])
                       for x in ratings_list]
        result = []
        for x in result_list:
            y = {"movieId": x[0], "title": x[1], "rating": x[2], "count": x[3]}
            result.append(y)
        return result
コード例 #31
0
class BaseResultsAnalyzer(object):
    def __init__(self,
                 es_index,
                 es_doc_type,
                 send_email=False,
                 email_recipients=(),
                 email_template_fp="",
                 query_limit=1000,
                 logger=None):
        self._es = ES()
        self._conf = self._es._conf
        self._es_index = es_index
        self._es_doc_type = es_doc_type
        self._limit = query_limit
        self._send_email = send_email
        self._email_recipients = email_recipients
        self._email_template_fp = email_template_fp
        self.log = logger if logger else log

    def get_all(self):
        """
        Get all the test results in json format
        """
        return self._es.search(index=self._es_index, size=self._limit)

    def get_test_by_id(self, test_id):
        """
        Get test results by test id
        :param test_id: test id created by performance test
        :return: test results in json format
        """
        if not self._es.exists(
                index=self._es_index, doc_type=self._es_doc_type, id=test_id):
            self.log.error('Test results not found: {}'.format(test_id))
            return None
        return self._es.get(index=self._es_index,
                            doc_type=self._es_doc_type,
                            id=test_id)

    def _test_version(self, test_doc):
        if test_doc['_source'].get('versions'):
            for v in ('scylla-server', 'scylla-enterprise-server'):
                k = test_doc['_source']['versions'].get(v)
                if k:
                    return k

        self.log.error('Scylla version is not found for test %s',
                       test_doc['_id'])
        return None

    def render_to_html(self, results, html_file_path=""):
        """
        Render analysis results to html template
        :param results: results dictionary
        :param html_file_path: Boolean, whether to save html file on disk
        :return: html string
        """
        self.log.info("Rendering results to html using '%s' template...",
                      self._email_template_fp)
        loader = jinja2.FileSystemLoader(
            os.path.dirname(os.path.abspath(__file__)))
        env = jinja2.Environment(loader=loader, autoescape=True)
        template = env.get_template(self._email_template_fp)
        html = template.render(results)
        if html_file_path:
            with open(html_file_path, "w") as f:
                f.write(html)
            self.log.info("HTML report saved to '%s'.", html_file_path)
        return html

    def send_email(self, subject, content, html=True, files=()):
        if self._send_email and self._email_recipients:
            self.log.debug('Send email to {}'.format(self._email_recipients))
            em = Email()
            em.send(subject,
                    content,
                    html=html,
                    recipients=self._email_recipients,
                    files=files)
        else:
            self.log.warning(
                "Won't send email (send_email: %s, recipients: %s)",
                self._send_email, self._email_recipients)

    def gen_kibana_dashboard_url(self, dashboard_path=""):
        return "%s/%s" % (self._conf.get('kibana_url'), dashboard_path)

    def check_regression(self):
        return NotImplementedError("check_regression should be implemented!")
コード例 #32
0
def search(request):
    try:
        logging.info("search(request)")
        #pdb.set_trace()
        # 当前页
        if 'current_page' in request.POST:
            current_page = int(request.POST['current_page'])
        else:
            current_page = 0

        # 文章抓取网站
        webMap = {
            "xlw": "新浪网",
            "xhs": "新华社",
            "fhw": "凤凰网"
        }  #"rmw":"人民网","zhw":"中华网",
        if 'webs' in request.POST:
            str_webs = request.POST['webs']
            str_webs = str_webs.split(",")
            webs = []
            for i in range(len(str_webs)):
                key = str_webs[i]
                webs.append(webMap[key])
        else:
            webs = ["新浪网", "新华网", "凤凰网"]  #"人民网", "中华网",
            str_webs = ["xlw", "xhs", "fhw"]  #"rmw","zhw",

        # 标签
        if "tags" in request.POST:
            str_tags = request.POST['tags']
            print str_tags
            tags = str_tags.split(",")
        else:
            tags = []

        if '' in tags:
            tags.remove('')
        print "tags:", tags

        # 每页显示的页数
        if "page_size" in request.POST:
            page_size = request.POST['page_size']
            page_size = int(page_size)
        else:
            page_size = 20

        if "timerange" in request.POST:
            timerange = request.POST['timerange']
            timerange = timerange.split(" - ")
            startTime = timerange[0]
            endTime = timerange[1]
        else:
            startTime = '2017-01-11'
            endTime = '2017-01-15'
            timerange = startTime + " - " + endTime

        # 是否去重
        if "article_db" in request.POST:
            article_db = request.POST['article_db']
            article_db = int(article_db)
        else:
            article_db = 0

        if "label_states" in request.POST:
            label_states = request.POST['label_states']
            str_label_states = label_states.split(",")
            label_state = []
            for i in range(len(str_label_states)):
                label_state.append(int(str_label_states[i]))
        else:
            label_state = [0, 1, 2]
        print "label_state:", label_state

        if "timerange_check" in request.POST:
            timerange_check = request.POST["timerange_check"]
            timerange_check = int(timerange_check)
        else:
            timerange_check = 0

        print "timerange_check", timerange_check

        if "label" in request.POST:
            label = request.POST['label']
            str_labels = label.split(",")
            label = []
            for i in range(len(str_labels)):
                label.append(int(str_labels[i]))
        else:
            label = [0, 1]

        print "label", label
        if "search_key" in request.POST:
            search_key = request.POST['search_key'].strip()
        else:
            search_key = None

        if "search_type" in request.POST:
            search_type = request.POST['search_type'].strip()
        else:
            search_type = "simple_search"

        print "search_key:", search_key
        print "search_type", search_type

        #label = labelMap[label]

        user = request.session.get('user', default=None)
        if 1 in label_state and user['role'] == "1":
            condition = {
                "article_source": webs,
                "article_db": article_db,
                "article_label_state": label_state,
                "startTime": startTime,
                "endTime": endTime,
                "current_page": current_page,
                "page_size": page_size,
                "update_student": user["username"],
                "article_label": label,
                "tags": tags,
                "timerange_check": timerange_check,
                "search_type": search_type
            }
        else:
            condition = {
                "article_source": webs,
                "article_db": article_db,
                "article_label_state": label_state,
                "startTime": startTime,
                "endTime": endTime,
                "current_page": current_page,
                "page_size": page_size,
                "article_label": label,
                "tags": tags,
                "timerange_check": timerange_check,
                "search_type": search_type
            }

        print "condition:", condition
        logging.info("[search] condition=" + str(condition))

        system_setting = SystemSetting()
        # databases = system_setting.get("databases", "mongodb")
        # print "databases:",databases

        #pdb.set_trace()
        if search_key == None or search_key.strip() == "":
            print "mongodb "
            articleDAO = ArticleDAO('articles_testN')
            articleList = articleDAO.article_search_list(condition)
        else:
            print "elastic search "
            es = ES()
            if search_type == "simple_search":
                articleList = es.article_simple_search(condition, search_key)
            else:
                articleList = es.article_search_list(condition, search_key)

        logging.info("[search] len(result)=" + str(len(articleList)))
    except BaseException, e:
        logging.error(e)
        print e
        print traceback.print_exc()
        articleList = []
コード例 #33
0
from es import ES
from config import *
from time import time

es = ES().getES()

if not es.indices.exists(index = index_name):
	print '	index does not exists, creat new index'
	es.indices.create(index = index_name, body = body)
	time.sleep(2)
	print 'index created successfully'
else:
	print 'An index with this name already exists'

doc1 = {
	'name': 'Erlang',
	'category': ['Distribute','OTP','Erlang','Elixir','Elm','Actor'],
	'Publication': 'Ericsson',
	'Publishing Date': '1970-01-01'
}
es.index(index = index_name, doc_type = doc_type, body = doc1, id = '123')

response = es.get(index= index_name, doc_type = doc_type, id = '123', ignore= 404)
print response
コード例 #34
0
from crawler import Crawler
from es import ES

# given seed URLs, our topic is "Catholic Church"
seed_urls = [
    "http://en.wikipedia.org/wiki/Catholic_Church",
    "http://en.wikipedia.org/wiki/Christianity",
    "http://en.wikipedia.org/wiki/Ten_Commandments_in_Catholic_theology"
]

# crawler
crawler = Crawler()
crawler.initialize(seed_urls)
crawler.crawl_control()

# merge indexes
my_es = ES()
my_es.initialize()
my_es.es_control()
コード例 #35
0
ファイル: index.py プロジェクト: mazc2121/boolean-nets
model = Model()

model.push(XnorDense(INPUT_SIZE, NUM_UNITS))
model.push(XnorDense(NUM_UNITS, OUTPUT_SIZE))

normal_model = Model()

normal_model.push(Dense(INPUT_SIZE, NUM_UNITS))
normal_model.push(Dense(NUM_UNITS, NUM_UNITS))
normal_model.push(Dense(NUM_UNITS, OUTPUT_SIZE, activation='softmax'))

# opt = GA(pop_size=POP_SIZE, num_parents=NUM_PARENTS, \
#  fitness_func=log_loss, rand_func=normal_rand, mutation_func=normal_mutation)

opt = ES(pop_size=POP_SIZE, fitness_func=log_loss, rand_func=sparse_rand)

ini_idx = 0
end_idx = BATCH_SIZE

while ini_idx < y_train.shape[0]:
    batch_xs = x_train[ini_idx:end_idx]
    batch_ys = y_train[ini_idx:end_idx]

    opt.fit(normal_model, batch_xs, batch_ys)

    normal_model.set_params(opt.best)

    pred = normal_model.forward(x_test)

    pred = np.argmax(pred, axis=1)