class TestJSON(TestCase): def setUp(self): for f in "testfile1.json", "testfile2.JSON", "fakefile.txt": handle = open(f, "w+") handle.write('{"a": 1, "b": 2}') handle.close() self.j = JsonHandler(".", "unittest", "unittest") self.client = MongoClient() self.client.drop_database("unittest") self.coll = self.client["unittest"]["unittest"] def test_can_make_pathlib_object_of_json_files(self): self.assertIn(PosixPath("testfile1.json"), self.j.jsons) self.assertIn(PosixPath("testfile2.JSON"), self.j.jsons) self.assertNotIn("fakefile.txt", self.j.jsons) def test_can_parse_json(self): self.assertEqual(self.j.parse_one_json(self.j.jsons[0]), {"a": 1, "b": 2}) def test_can_put_files_in_database(self): self.assertEqual(self.coll.count(), 0) self.j.put_jsons_in_database(self.j.jsons, self.j.db_name, self.j.coll_name) self.assertEqual(self.coll.count(), 2) def test_inserted_docs_can_be_retrieved(self): self.j.put_jsons_in_database(self.j.jsons, self.j.db_name, self.j.coll_name) self.assertEqual(self.coll.find({"a": 1}).count(), 2) def tearDown(self): for f in "testfile1.json", "testfile2.JSON", "fakefile.txt": os.remove(f) self.client.drop_database("unittest") self.client.close()
def __init__(self, sizes, dataHandler, save_rate, checkpoints_dir, activation_function): self.num_layers = len(sizes) self.sizes = sizes self.dataHandler = dataHandler self.layers = layer.create_layers(self.sizes, activation_function) self.save_rate = save_rate self.checkpoints_dir = checkpoints_dir self.json_handler = JsonHandler()
def do_GET(self): if self.path=="/api/data/total_faces": self._set_response() self.wfile.write(JsonHandler(filename).get_data_from_file("totalFaces")) elif self.path=="/api/data/avg_age": self._set_response() self.wfile.write(JsonHandler(filename).get_data_from_file("currentAverageAge")) elif self.path=="/api/data/parity": self._set_response() self.wfile.write(JsonHandler(filename).get_data_from_file("parity")) elif self.path=="/api/data/expressions": self._set_response() self.wfile.write(JsonHandler(filename).get_data_from_file("expressions"))
def do_POST(self): if self.path=="/compute_stats": content_length = int(self.headers['Content-Length']) # <--- Gets the size of data post_data = self.rfile.read(content_length) # <--- Gets the data itself self._set_response() JsonHandler(filename).process_data_from_json(post_data) self.wfile.write(post_data)
def setUp(self): for f in "testfile1.json", "testfile2.JSON", "fakefile.txt": handle = open(f, "w+") handle.write('{"a": 1, "b": 2}') handle.close() self.j = JsonHandler(".", "unittest", "unittest") self.client = MongoClient() self.client.drop_database("unittest") self.coll = self.client["unittest"]["unittest"]
def mc_worker(jobs, stats, ctl, store, timeout=5): logging.info("mc_worker started") while ctl["run_ok"]: try: root, parents, val = jobs.get(block=True, timeout=timeout) except Queue.Empty: logging.debug("mc_worker hasn't received jobs for %s seconds" % timeout) continue start = time.time() for server in val: try: ip, port = server.split(":") except (ValueError, AttributeError), e: logging.error("unable to collect mc stats from %s : %s" % (server, e)) continue mc_server = Server(ip) # get bucket name from root and parent nodes bucket = DataHelper.get_bucket(root, parents) # initialize memcached source mc_source = MemcachedSource(mc_server, bucket) # initialize handlers to dump data json doc j_handler = JsonHandler() s_hanlder = SerieslyHandler(store) # collect data from source and emit to handlers mc_coll = MemcachedCollector([mc_source], [j_handler, s_hanlder]) mc_coll.collect() mc_coll.emit() stats.put([mc_source.fast, mc_source.meta], block=True) stats.put([mc_source.slow, mc_source.meta], block=True) delta = time.time() - start logging.debug("collected mc stats from %s, took %s seconds" % (val, delta)) if delta < timeout: logging.debug("mc_worker sleep for %s seconds" % (timeout - delta)) time.sleep(timeout - delta)
async def init(loop, args): if args.tags: tags = args.tags.split(',') else: tags = [Model.default_tag] model = Model(loop) if args.model: await model.set_model(args.model, tags) batcher = Batcher(model, loop, args.batch_size) web_app = web.Application(loop=loop, client_max_size=args.request_size) web_app.on_shutdown.append(on_shutdown) web_app.router.add_get('/stats', batcher.stats_handler) json_handler = JsonHandler(model, batcher, args.batch_transpose) if args.no_cors: web_app.router.add_get('/', batcher.info_handler) web_app.router.add_post('/{method}', json_handler.handler) web_app.router.add_post('/', json_handler.handler) else: cors = aiohttp_cors.setup(web_app, defaults={ "*": aiohttp_cors.ResourceOptions( allow_credentials=True, expose_headers="*", allow_headers="*") }) get_resource = cors.add(web_app.router.add_resource('/')) cors.add(get_resource.add_route("GET", batcher.info_handler)) post_resource = cors.add(web_app.router.add_resource('/{method}')) cors.add(post_resource.add_route("POST", json_handler.handler)) post_resource = cors.add(web_app.router.add_resource('/')) cors.add(post_resource.add_route("POST", json_handler.handler)) if args.static_path: web_app.router.add_static('/web/', path=args.static_path, name='static') grpc_app = Server([GrpcHandler(model, batcher)], loop=loop) return web_app, grpc_app
parser.add_option('-j', '--jsonFile', dest='json_file') options, otherjunk = parser.parse_args(argv) return options options = parse_commands(sys.argv[1:]) header_list = [ "person_id", "level", "chapterIndex", "contentIndex", "questionIndex", "derivedIndex", "duration", "point", "clearDateTime", "incorrectAnswerCount" ] rest_handler = RestHandler() json_handler = JsonHandler() csv_handler = CsvHandler(filepath=options.drag_file, header_list=header_list) #f = open(options.json_file, 'w') date_list_json = rest_handler.get_json_of_date_list() date_list = json_handler.json_to_date_list(date_list_json) for idx, date in enumerate(date_list): #for idx, date in enumerate(['1970-01-01','2019-02-07']): #result_dict_list = [] if date == '1970/01/01': print('Date 1970/01/01') continue print('[{}], ({}/{}) Now Collecting'.format(date, idx + 1, len(date_list))) for mobile_os in ('iOS', 'Android'): person_list_json = rest_handler.get_json_of_person_id_by_date( date, mobile_os)
from rest_handler import RestHandler from json_handler import JsonHandler from csv_handler import CsvHandler def parse_commands(argv): from optparse import OptionParser parser = OptionParser('"') parser.add_option('-o', '--userFile', dest='user_file') parser.add_option('-p', '--personFile', dest='person_file') parser.add_option('-m', '--mobileOS', dest='mobile_os') options, otherjunk = parser.parse_args(argv) return options options = parse_commands(sys.argv[1:]) with open(options.person_file) as person_file: person_list = person_file.read().splitlines() options = parse_commands(sys.argv[1:]) header_list = ['person_id', 'level'] rest_handler = RestHandler(mobile_os=options.mobile_os) json_handler = JsonHandler() csv_handler = CsvHandler(filepath=options.user_file,header_list=header_list) for person_id in person_list: json_result = rest_handler.get_user_data_by_person_id(person_id) result_dict_list = json_handler.json_user_data_to_dict_list(json_result,person_id) print(result_dict_list) csv_handler.dict_to_csv(dict_list=result_dict_list)
home = expanduser("~") sys.path.append('{}/ProjectDoBrain/codes/Modules'.format(home)) from rest_handler import RestHandler from json_handler import JsonHandler from csv_handler import CsvHandler def parse_commands(argv): from optparse import OptionParser parser = OptionParser('"') parser.add_option('-s', '--surveyFile', dest='survey_file') parser.add_option('-b', '--birthdayFile', dest='birthday_file') parser.add_option('-m', '--mobileOS', dest='mobile_os') options, otherjunk = parser.parse_args(argv) return options options = parse_commands(sys.argv[1:]) header_list = ['person_id', 'birthday'] survey_df = pd.read_csv(options.survey_file) rest_handler = RestHandler(mobile_os=options.mobile_os) json_handler = JsonHandler() csv_handler = CsvHandler(filepath=options.birthday_file, header_list=header_list) for person in survey_df.person_id.unique(): json_result = rest_handler.get_user_data_by_person_id(person) result_dict_list = json_handler.user_json_to_birthday(person, json_result) csv_handler.dict_to_csv(dict_list=result_dict_list)
parser.add_option('-s', '--scoreFile', dest='score_file') parser.add_option('-j', '--jsonFile', dest='json_file') options, otherjunk = parser.parse_args(argv) return options options = parse_commands(sys.argv[1:]) header_list = [ "person_id", "level", "chapterIndex", "contentIndex", "questionIndex", "derivedIndex", "duration", "point", "clearDateTime", "incorrectAnswerCount" ] rest_handler = RestHandler() json_handler = JsonHandler() csv_handler = CsvHandler(filepath=options.score_file, header_list=header_list) #f = open(options.json_file, 'w') date_list_json = rest_handler.get_score_json_of_date_list() date_list = json_handler.json_to_date_list(date_list_json) survey_person_list = [] for mobile_os in ('iOS', 'Android'): survey_rest_handler = RestHandler(mobile_os=mobile_os) survey_json = survey_rest_handler.get_survey_data() temp_person_list = json_handler.json_survey_data_to_person_list( survey_json) survey_person_list += temp_person_list survey_person_set = set(survey_person_list) survey_person_set.add('3c234013d1ec58644fe3779b67542e45') survey_person_set.add('48f116a8d36c91fa878653d625b75102')
import click import os import copy import convert from abc import ABCMeta from itertools import groupby from collections import namedtuple from datetime import datetime from json_handler import JsonHandler jh = JsonHandler() LEAGUES_DATA = jh.load_leagues() def get_writer(output_format='stdout', output_file=None): return globals()[output_format.capitalize()](output_file) class BaseWriter(object): __metaclass__ = ABCMeta def __init__(self, output_file): self.output_filename = output_file class Stdout(BaseWriter): def __init__(self, output_file): super().__init__(output_file)
class Network(object): def __init__(self, sizes, dataHandler, save_rate, checkpoints_dir, activation_function): self.num_layers = len(sizes) self.sizes = sizes self.dataHandler = dataHandler self.layers = layer.create_layers(self.sizes, activation_function) self.save_rate = save_rate self.checkpoints_dir = checkpoints_dir self.json_handler = JsonHandler() def feedforward(self, a, keep_z=False): self.layers[0].activation = a.reshape(a.shape[0], 1) i = 1 for layer in self.layers[1:]: layer.update_layer(self.layers[i - 1].activation, keep_z) i += 1 return layer.activation def update_mini_batch(self, mini_batch, eta): # nablas terão formato de acordo com seus respectivos layers nabla_b = [np.zeros(layer.bias.shape) for layer in self.layers[1:]] nabla_w = [np.zeros(layer.weight.shape) for layer in self.layers[1:]] mini_batch_length = len(mini_batch) for k in xrange( mini_batch_length ): # para cada exemplo de treino da mini batch, calcula o ajuste necessário x, y = self.dataHandler.get_example(update_batch=True) delta_nabla_b, delta_nabla_w = self.backprop(x, y) nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nabla_b)] # dC/db nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)] # dC/dw for i, (nw, nb) in enumerate(zip(nabla_w, nabla_b)): self.layers[i + 1].weight -= (eta / mini_batch_length) * nw # update weight self.layers[i + 1].bias -= (eta / mini_batch_length) * nb # update bias def backprop(self, x, y): # feedforward, passa pela rede indo em direção a ultima camada, calculando os zs e as ativações self.feedforward(x, keep_z=True) # output error (calcula a última camada "na mão") nabla_b = [] nabla_w = [] for l in self.layers[1:]: nabla_b.append(np.zeros(l.bias.shape)) nabla_w.append(np.zeros(l.weight.shape)) activation = self.layers[-1].activation z = self.layers[-1].z delta = self.cost_derivative(activation, y) * self.layers[-1].activation_function( z, prime=True) # (BP1) nabla_b[-1] = delta # (BP3) nabla_w[-1] = np.dot(delta, self.layers[-2].activation.transpose()) # (BP4) # backpropagate the error, l é usado de forma crescente, mas como acessar posições # negativas significa acessar de trás pra frente, o erro é propagado do fim ao começo da rede for l in xrange(2, self.num_layers): z = self.layers[-l].z afp = self.layers[-l].activation_function(z, prime=True) delta = np.dot(self.layers[-l + 1].weight.transpose(), delta) * afp # (BP2) nabla_b[-l] = delta # (BP3) nabla_w[-l] = np.dot( delta, self.layers[-l - 1].activation.transpose()) # (BP4) return (nabla_b, nabla_w) def cost_derivative(self, output_activations, y): return (output_activations - y) def evaluate(self, test_data): # guarda resultados passando o conjunto de teste pela rede # e assume o maior resultado como resposta da rede test_results = [] for i in test_data: x, y = self.dataHandler.get_example(i) test_results.append((np.argmax(self.feedforward(x)), np.argmax(y))) # test_results = [(np.argmax(self.feedforward(x)), np.argmax(y)) for (x,y) in test_data] n = len(test_data) hit = sum(int(x == y) for (x, y) in test_results) # retorna taxa de acerto return (float(hit) / float(n)) def SGD(self, training_data, epochs, mini_batch_size, eta, val_data=None): # n = len(training_data) # para cada epoch, embaralha o conjunto de treino, faz mini batches de tamanho definido, recalcula pesos e biases for j in xrange(epochs): # each mini_batch contains a list of indexes, each index corresponds # to an example mini_batches = self.dataHandler.get_mini_batches( minBatch_size=mini_batch_size) for i, mini_batch in enumerate(mini_batches): self.update_mini_batch(mini_batch, eta) if j > 0 and j % self.save_rate == 0: self.save_learning(self.checkpoints_dir + 'epoch' + str(j) + '.json') # se houver conjunto de teste, usa a rede atual para ver o hit rate if val_data: print "Epoch {0} - hit rate: {1}".format( j, self.evaluate(self.dataHandler.val_set)) # senão, a epoch acabou e vamos para a próxima else: print "Epoch {0} complete.".format(j) def save_learning(self, ckpt_name): # Dict to save the params from the checkpoint ckpt = {} weights = {} biases = {} for i, layer in enumerate(self.layers): weights['l' + str(i)] = layer.weight.tolist() biases['l' + str(i)] = layer.bias.tolist() ckpt = {"weights": weights, "biases": biases} self.json_handler.write(ckpt, ckpt_name) def load_learning(self, ckpt_file): # Loads the parameters from the checkpoint file params = json_handler.read(ckpt_file) weights = params['weights'] biases = params['biases'] # Fills the layers with the for i in xrange(len(self.layers)): self.layers[i].weight = np.asarray( weights['l' + str(i)]).astype("float64") self.layers[i].bias = np.asarray(biases['l' + str(i)]).astype("float64") def predict(self, x): # x must have the same size of the input layer self.feedforward(x) return np.argmax(self.layers[-1].activation) def weights_for_humans(self, img_dir): # This function saves the weights in image format # So humans can try to see the magic better # But for now, it is only possible if the number of neurons in a layer # is a square number for i, layer in enumerate(self.layers[1:]): k = len(layer.weight[0]) sqrt_k = int(math.sqrt(k)) # checks if it has a square size if sqrt_k * sqrt_k == k: img = np.zeros((k, 3), dtype="uint8") for j, weight in enumerate(layer.weight): p = np.argwhere(weight >= 0) n = np.argwhere(weight < 0) img[p[:, 0], :] = np.absolute(weight[p]) * np.array( [0, 255, 0]) # green img[n[:, 0], :] = np.absolute(weight[n]) * np.array( [0, 0, 255]) # red if img_dir[-1] == '/': img_name = img_dir + "l" + str(i + 1) + "w" + str( j) + ".jpg" else: img_name = img_dir + '/' + "l" + str( i + 1) + "w" + str(j) + ".jpg" cv2.imwrite(img_name, img.reshape(sqrt_k, sqrt_k, 3))
from json_handler import JsonHandler from csv_handler import CsvHandler def parse_commands(argv): from optparse import OptionParser parser = OptionParser('"') parser.add_option('-s', '--surveyFile', dest='survey_file') parser.add_option('-m', '--mobileOS', dest='mobile_os') parser.add_option('-j', '--jsonFile', dest='json_file') options, otherjunk = parser.parse_args(argv) return options options = parse_commands(sys.argv[1:]) f = open(options.json_file, 'w') header_list = [ 'person_id', 'wonDiagnosis', 'worryingCategory', 'diagnosedDisease' ] rest_handler = RestHandler(mobile_os=options.mobile_os) json_handler = JsonHandler() csv_handler = CsvHandler(filepath=options.survey_file, header_list=header_list) json_result = rest_handler.get_survey_data() f.write(json.dumps(json_result) + '\n') result_dict_list = json_handler.json_survey_data_to_dict(json_result) csv_handler.dict_to_csv(dict_list=result_dict_list) f.close()
options = parse_commands(sys.argv[1:]) with open(options.person_file) as person_file: person_list = person_file.read().splitlines() options = parse_commands(sys.argv[1:]) header_list = [ 'person_id', 'level', 'game_level', 'clear_date_time', 'Memory', 'VelocityPerceptual', 'Numerical', 'Discrimination', 'SpacePerceptual', 'Inference', 'Organizing', 'Creative' ] rest_handler = RestHandler(mobile_os=options.mobile_os) json_handler = JsonHandler() csv_handler = CsvHandler(filepath=options.user_score_file, header_list=header_list) f = open(options.json_file, 'w') content_num = 0 for person_id in person_list: try: json_result = rest_handler.get_user_score_data_by_person_id(person_id) except: continue f.write(person_id + '\t' + json_result + '\n') result_dict_list = json_handler.json_user_score_data_to_dict_list( json_result, person_id, content_num) csv_handler.dict_to_csv(dict_list=result_dict_list) f.close()
class QueryMatcher(object): jsonHandler = JsonHandler() nlp = None def __init__(self, nlp): self.nlp = nlp def getQuery(self, statement): properties = self.getProperties(statement) queries = self.searchPropertyMatch(properties) maxQueryID, maxReplaces, maxScore = self.getBestQueryProperties( queries, statement) return self.buildOriginalQuery(maxQueryID, maxReplaces), maxScore def getProperties(self, statement): doc = self.nlp(statement) properties = {"time": "0", "price": "0"} for each in doc.ents: if str(each.label_) == "MONEY" or str(each.label_) == "CARDINAL": properties.__setitem__("price", "1") elif str(each.label_) == "TIME": properties.__setitem__("time", "1") return properties def searchPropertyMatch(self, properties): queries = self.jsonHandler.matchProperties(properties) return queries def getBestQueryProperties(self, queries, statement): #do the partial matching for each query as well as the entity matching maxScore = 0.0 maxQueryID = "" maxReplaces = None for ID in queries: totScore = self.getMaxHit(statement, self.jsonHandler.getGeneralizedQuery(ID)) replaces = self.jsonHandler.getReplaces(ID) numOfReplaces = len(replaces) for i in range(0, len(replaces)): replace = replaces[i] score, key = self.getMaximumScoringKey(replace, statement) totScore += score / numOfReplaces replaces[i] = key if (maxScore >= totScore): continue maxScore = totScore maxQueryID = ID maxReplaces = replaces return maxQueryID, maxReplaces, maxScore def getMaxHit(self, statement, generalizedQueries): maxScore = 0.0 queryList = generalizedQueries.split(",") for each in queryList: score = fuzz.partial_ratio(statement, each) if (score > maxScore): maxScore = score return maxScore def buildOriginalQuery(self, ID, replaces): if ID == "": return originalQuery = self.jsonHandler.getOriginalQuery(ID) for i in range(0, len(replaces)): originalQuery = originalQuery.replace("xxx" + str(i), replaces[i]) return originalQuery def getMaximumScoringKey(self, replace, statement): if (replace == "price"): doc = self.nlp(statement) for ent in doc.ents: if str(ent.label_) == "MONEY" or str(ent.label_) == "CARDINAL": return 0, self.get_first_nbr_from_str(str(ent)) else: categorySet = replace.split(",") maxScore = 0.0 maxScoreReplace = "" for each in categorySet: keySet = self.jsonHandler.getKeywordList(each) for eachInner in keySet: score = fuzz.partial_ratio(eachInner, statement) if score > maxScore: maxScore = score maxScoreReplace = eachInner return maxScore, maxScoreReplace def get_first_nbr_from_str(self, input_str): if not input_str and not isinstance(input_str, str): return 0 out_number = '' for ele in input_str: if (ele == '.' and '.' not in out_number) or ele.isdigit(): out_number += ele elif out_number: break return out_number
sys.path.append('{}/ProjectDoBrain/codes/Modules'.format(home)) from rest_handler import RestHandler from json_handler import JsonHandler from csv_handler import CsvHandler def parse_commands(argv): from optparse import OptionParser parser = OptionParser('"') parser.add_option('-p', '--personFile', dest='person_file') parser.add_option('-m', '--mobileOs', dest='mobile_os') options, otherjunk = parser.parse_args(argv) return options #make person_id csv without HEADER options = parse_commands(sys.argv[1:]) header_list = ["person_id"] rest_handler = RestHandler(mobile_os=options.mobile_os) json_handler = JsonHandler() csv_handler = CsvHandler(filepath=options.person_file, header_list=header_list) json_result = rest_handler.get_json_of_person_id() result_dict_list = json_handler.json_person_id_to_dict_list( json_source=json_result, mobile_os=options.mobile_os) csv_handler.dict_to_csv(dict_list=result_dict_list)