Ejemplo n.º 1
0
    def generate_result_file(self, quest_ans_dict, is_test_data):

        total_un_ques = 0

        res_file_path = os.path.join(
            os.path.dirname(self.ndq_test_data.processed_data_path),
            "tqa_test-tf.json")
        res_f_handle = open(res_file_path, "w")

        json_path = os.path.dirname(
            os.path.dirname(self.ndq_test_data.processed_data_path))
        read_val_json = read_json(json_path, is_test_data)

        q_ids_list = read_val_json.get_questions_id()

        for q_id in q_ids_list:
            if quest_ans_dict.get(q_id, None) is None:
                #print "Question not present",q_id
                quest_ans_dict[q_id] = 'a'
                total_un_ques += 1

        print "Total untracked questions", total_un_ques
        ques_ans_json_data = json.dumps(quest_ans_dict, indent=4)
        res_f_handle.write(ques_ans_json_data)
        res_f_handle.close()
Ejemplo n.º 2
0
def get_most_common_bonus_type():
    items = read_json('items')
    affixes = {}
    for item in items:
        for affix in item['affixes']:
            if 'name' in affix and 'type' in affix:
                if affix['name'] not in affixes:
                    affixes[affix['name']] = {}
                if affix['type'] not in affixes[affix['name']]:
                    affixes[affix['name']][affix['type']] = 0

                affixes[affix['name']][
                    affix['type']] = affixes[affix['name']][affix['type']] + 1

    bestBonusMap = {}

    for affix, v in affixes.items():
        maxCount = 0

        for bonusType, count in v.items():
            if count > maxCount and bonusType not in ['Profane', 'Insight']:
                maxCount = count
                bestBonus = bonusType

        bestBonusMap[affix] = bestBonus

    return bestBonusMap
 def __init__(self,
              word2vec_path,
              processed_data_path,
              word_vec_size,
              max_q_length,
              max_option_length,
              max_opt_count,
              max_sent_para,
              max_words_sent,
              op_path=None):
     self.processed_data_path = processed_data_path
     self.raw_text_path = os.path.join(processed_data_path,
                                       "text_question_sep_files")
     if not os.path.exists(self.raw_text_path):
         read_json_data = read_json(os.path.dirname(processed_data_path))
         read_json_data.read_content()
     if op_path is None:
         op_path = os.path.join(processed_data_path, "one_hot_files")
     if not os.path.exists(op_path):
         os.makedirs(op_path)
     self.word2vec_path = word2vec_path
     self.op_path = op_path
     self.word_vec_size = word_vec_size
     self.num_of_words_in_opt = max_option_length
     self.num_of_words_in_question = max_q_length
     self.num_of_sents_in_closest_para = max_sent_para
     self.num_of_words_in_sent = max_words_sent
     self.num_of_words_in_closest_sentence = max_sent_para * max_words_sent
     self.num_of_options_for_quest = max_opt_count
     self.folder_list = self.get_list_of_dirs(self.raw_text_path)
     self.unknown_words_vec_dict = None
     self.unknown_words_vec_dict_file = "unk_word2vec_dict.pkl"
     self.common_files_path = "../common_files"
     if not os.path.exists(self.common_files_path):
         os.makedirs(self.common_files_path)
Ejemplo n.º 4
0
def get_inverted_synonym_map():
    synData = read_json('affix-synonyms')

    out = {}
    for syn in synData:
        for name in syn['synonyms']:
            out[name] = syn['name']
    return out
def parse_item_pages():
    sets = read_json('sets')

    cachePath = "./cache/items/"
    items = []
    for file in os.listdir(cachePath):
        if include_page(file):
            items.extend(get_items_from_page(cachePath + file, sets))

    items.sort(key=lambda x: x['name'])

    write_json(items, 'items')
Ejemplo n.º 6
0
    def __init__(self, cfg_file, cfg_name=None):

        self._cfg = rj.read_json(cfg_file)

        if cfg_name and cfg_name in self._cfg:
            self._cfg = self._cfg[cfg_name]

        dict.__init__(self, self._cfg)

        self.__initialized = True  # see self.__setattr__()

        set_test_config(self)
Ejemplo n.º 7
0
    def test_read_json(self):
        """
        read_json function returns a dictionary with
        Key: neighborhood
        Value: list of (latitude, longitude)

        We should expect to have 195 neighborhood in New York City
        """

        result = read_json("NYC_Shapes.json")
        neighborhood = [result.keys()]
        self.assertEqual(len(neighborhood), 195)
Ejemplo n.º 8
0
def parse_minor_artifacts():
    items = read_json('items')

    page = open('./cache/Minor_Artifact.html', "r", encoding='utf-8').read()

    soup = BeautifulSoup(page, 'html.parser')

    artifacts = get_artifacts_from_page(soup)

    for item in items:
        if item['name'] in artifacts:
            item['artifact'] = True

    write_json(items, 'items')
Ejemplo n.º 9
0
    def __init__ (self, cfg_file, cfg_name=None) :

        config = rj.read_json (cfg_file)

        if  cfg_name and cfg_name in config :
            self._cfg = config[cfg_name]
        else :
            self._cfg = config

        global _test_config
        _test_config = self

        dict.__init__ (self, self._cfg)

        self.changed_values = {}
        self.__initialized  = True
Ejemplo n.º 10
0
    def __init__(self, cfg_file, cfg_name=None):

        config = rj.read_json(cfg_file)

        if cfg_name and cfg_name in config:
            self._cfg = config[cfg_name]
        else:
            self._cfg = config

        global _test_config
        _test_config = self

        dict.__init__(self, self._cfg)

        self.changed_values = {}
        self.__initialized = True
Ejemplo n.º 11
0
def run(rootdir):
    i = 0
    for parent, dirnames, filenames in os.walk(rootdir):
        for filename in filenames:
            # print rootdir, filename
            try:
                json_list = read_json(rootdir + '/' + filename)
                # print json_list
                for json_obj in json_list:
                    d_o = dict(json_obj)
                    d_o.update({u'insert.20160919': 1})
                    CompanyDB.upsert_company(d_o)
                    i += 1
                    print i
                    pass
            except Exception, e:
                print "eeeeeeeeeeeee %s" % e.message
Ejemplo n.º 12
0
def send_message_to_rabbitmq():
    data = rd.read_json()

    #Estabecendo conexão com o rabbitmq
    connection = pika.BlockingConnection(
        pika.ConnectionParameters(host='localhost'))
    channel = connection.channel()

    channel.queue_declare(queue='dados_json')

    channel.basic_publish(
        exchange='',
        routing_key='dados_json',
        body=str(
            rd.creat_data_to_pass()))  #envia os dados do json para o rabbitmq

    print(" Dados Enviados")
    connection.close()
def generator_json(batch_size):
    batch_features = np.zeros((batch_size, 1, 84,84))
    batch_segment = np.zeros((batch_size,1, 84, 84))
    while True:
        for i in range(batch_size):

            image_name = random.choice(image_paths)
            json_path = image_name.replace('.png','.json')
            segment,gray = read_json(json_path)
            gray = cv2.resize(gray,(84,84))
            segment = cv2.resize(segment, (84, 84))
            segment = cv2.threshold(segment, 0.1, 1., cv2.THRESH_BINARY)[1]




            batch_features[i] = [gray]
            batch_segment[i] = [segment]
        yield batch_features, batch_segment
Ejemplo n.º 14
0
def get_data_stats():
    stats = {}

    affixGroups = read_json('affix-groups')
    stats['groups'] = {'items': len(affixGroups)}

    affixSynonyms = read_json('affix-synonyms')
    stats['synonyms'] = {'items': len(affixSynonyms)}

    cannith = read_json('cannith')
    stats['cannith'] = {
        'bonus types': len(cannith['bonusTypes']),
        'item types': len(cannith['itemTypes']),
        'progressions': len(cannith['progression']),
    }

    crafting = read_json('crafting')
    stats['crafting'] = {
        'systems': len(crafting),
        'items': reduce(lambda x, v: x + len(v), crafting.values(), 0)
    }

    quests = read_json('quests')
    stats['quests'] = {'raids': len(quests['raids'])}

    sets = read_json('sets')
    stats['sets'] = {
        'count':
        len(sets),
        'affixes':
        reduce(
            lambda x, v: x + reduce(lambda x2, v2: x2 + len(v2['affixes']), v,
                                    0), sets.values(), 0)
    }

    items = read_json('items')
    stats['items'] = {
        'items': len(items),
        'affixes': reduce(lambda x, v: x + len(v['affixes']), items, 0)
    }

    return stats
Ejemplo n.º 15
0
    def __init__(self, *args, **kwargs):
        try:
            # ログ設定ファイル読み込み
            logging.config.fileConfig(COMMON_PATH + const.RYU_LOG_CONF,
                                      disable_existing_loggers=False)
            self.logger = logging.getLogger(__name__)
            self.logger.debug("")

            super(mld_controller, self).__init__(*args, **kwargs)

            # コンテキストからDPSetの取得
            self.dpset = kwargs["dpset"]

            # システムモジュールのソケットに対しパッチを適用
            patcher.monkey_patch()

            # 設定情報の読み込み
            config = read_json(COMMON_PATH + const.CONF_FILE)
            self.logger.info("%s:%s", const.CONF_FILE,
                             json.dumps(config.data, indent=4,
                                        sort_keys=True, ensure_ascii=False))
            self.config = config.data[const.SETTING]

            # ループフラグの設定
            self.loop_flg = True

            # ZMQの接続文字列を取得
            zmq_conn = self.get_zmq_connect(config)
            self.zmq_pub = zmq_conn[0]
            self.zmq_sub = zmq_conn[1]

            # ZMQ送受信用ソケット生成
            self.create_socket(self.zmq_pub, self.zmq_sub)

            # mldからの受信スレッドを開始
            hub.spawn(self.receive_from_mld)

        except:
            self.logger.error("%s ", traceback.print_exc())
Ejemplo n.º 16
0
class TestApi:
    #最基本的用法
    # @pytest.mark.parametrize('args',["美团","腾讯","阿里","百度"])#参数的值有多少个,那么方法就会执行多少次
    # def test_01_api(self,args):
    #     print(args)

    #解包的用法(ddt,unittest找个框架实现数据驱动的装饰器,@unpack)
    # @pytest.mark.parametrize('args',read_yaml() )
    # def test_01_api(self,args):
    #     '''获得网页新闻的接口'''
    #     url = args['api_request']['url']
    #     method = args['api_request']['method']
    #     headers = args['api_request']['headers']
    #     params = args['api_request']['params']
    #     validate = args["api_validate"]
    #
    #     if method == 'get':
    #         requests.get()
    #     else:
    #         response=requests.post(url,json=params,headers=headers)
    #         for val in validate:
    #            assert val['eq']['code']==response.json()['code']

    @pytest.mark.parametrize('args', read_json())
    def test_01_api(self, args):
        '''获得网页新闻的接口'''
        url = args['api_request']['url']
        method = args['api_request']['method']
        headers = args['api_request']['headers']
        params = args['api_request']['params']
        eq = args["api_validate"]["eq"]

        if method == 'get':
            requests.get()
        else:
            response = requests.post(url,json=params,headers=headers)
            # assert code == response.json()["code"]
            # print(response.json())
            cmp_dict(eq,response.json())
elmethod = "composition"
sigma = 12
lamda = 0.0001
kernel = "gaussian"

MAEtrain = []
MAEcross = []
el1 = sys.argv[1]
el2 = sys.argv[2]

maxrun = 5
avgEpredict = np.zeros(11)
for irun in range(maxrun):
    write_json(el1, el2)
    mset = read_json("include_ML_natoms_30/data.json", energytype="formation")
    #    mset = read_json("tests/data.json", energytype="formation")
    mcross, mtrain = get_testset(mset)
    mcross = read_json("%s%s.json" % (el1, el2), energytype="formation")
    #mtest, mset = get_testset(mset)
    #mtrain, mcross, mset = get_train_validation_set(mset)
    elmap = get_elements_map(mset)

    result = krr_regression(mtrain,
                            mcross,
                            sigma,
                            lamda,
                            kernel=kernel,
                            elmap=elmap,
                            elmethod=elmethod,
                            loadalpha=False,
Ejemplo n.º 18
0
                                       binary=False)
elif config.model_name = 'TfIdf':
    vectorizer = TfidfVectorizer(max_df=0.5, max_features=opts.n_features,
                                 min_df=1, stop_words='english',
                                 use_idf=True)
elif config.model_name = 'Counter':
    vectorizer = CountVectorizer()


    #vectorizer = CountVectorizer(tokenizer=lambda text: text.split())
    #vectorizer = TfidfVectorizer(tokenizer=lambda text: text.split())

data_subtitle=[]
vid_list=[]
limits=None
read_json.read_json(sys.argv[1],data_subtitle,sys.argv[2],vid_list,limits)
print(len(data_subtitle))
X = vectorizer.fit_transform(data_subtitle)

print("done in %fs" % (time() - t0))
print("n_samples: %d, n_features: %d" % X.shape)
print()

#opts.n_components=50

if opts.n_components:
    print("Performing dimensionality reduction using LSA")
    t0 = time()
    # Vectorizer results are normalized, which makes KMeans behave as
    # spherical k-means for better results. Since LSA/SVD results are
    # not normalized, we have to redo the normalization.
Ejemplo n.º 19
0
import numpy as np
from read_json import read_json, get_elements_map
from split_dataset import *
from ml import *
from pylab import *
np.random.seed(0)

mset = read_json("larger_data/data.json", energytype="formation")
mcross, mtrain = get_testset(mset)
mcross = read_json("CoSi.json", energytype="formation")
elmap = get_elements_map(mset)
Xtrain, Xcross, Etrain, Ecross = get_X(mtrain,
                                       mcross,
                                       elmap=elmap,
                                       elmethod="composition")

alpha = pickle.load(open('alpha.pkl', 'r'))

sigma = 12
print mcross[0].formula
for i in range(len(Ecross)):
    Eest = 0  # estimation for set number i
    Econ = []
    ECo = 0
    for j in range(len(Etrain)):
        d = Xtrain[j] - Xcross[i]
        dd = np.sqrt(np.inner(d, d))
        Etmp = alpha[j] * get_kernel(dd, sigma, kernel="gaussian")
        Eest += Etmp
        Econ.append(np.abs(Etmp))
        if "Co" in mtrain[j].formula:
def write_csv(strtype='general'):
    if strtype == "general":
        mset = read_json(filename = "data.json")
        f = open('general.csv', 'w')
        print >> f, "(E_DFT, mean.mass, mean.elecneg, mean.radius, mean.ionization"
        for atoms in mset:
            elecneg = []
            rad = []
            Eionization = []
            for name in atoms.names:
                elecneg.append(pauling[name])
                rad.append(radius[name])
                Eionization.append(Eion[name])
            sum_elecneg = np.sum(elecneg) / len(elecneg)
            std_elecneg = np.std(elecneg) 
            sum_mass = np.sum(atoms.masses) / len(atoms.masses)
            std_mass = np.std(atoms.masses)
            sum_radius = np.sum(rad) / len(rad)
            std_radius = np.std(rad)
            sum_Eionization = np.mean(Eionization)
            print >>f, "%6.2f, %6.2f, %6.2f, %6.2f, %6.2f"%(atoms.Eref, sum_mass, 
                                                            sum_elecneg, 
                                                            sum_radius, sum_Eionization)
                                                                                    
    elif strtype == "RS":
        mset = read_json(filename = "data_RS.json")
        f = open('RS.csv', 'w')
        print >> f, "(name0, name1, el, sum.mass, dmass, sum.elecneg, delecneg, calcvol, sum.radius, draius, dpos, Emadelung, Ecoh"
        for atoms in mset:
            elecneg1 = pauling[atoms.names[0]]
            elecneg2 = pauling[atoms.names[1]]
            volscaled = atoms.calcvol**(1./3.) / (radius[atoms.names[0]] * radius[atoms.names[1]]) * 10**4
            delecneg = np.abs(elecneg1-elecneg2)
            sqrtneg = np.std([elecneg1, elecneg2]) #np.sqrt(elecneg1*elecneg2)
    #        Elatt = Emadelung["%s"%(atoms.icsdno)]
            for el in atoms.names:
                if el in charge.keys():
                    Elatt = Emadelung["%s"%(atoms.icsdno)] / charge[el]**2
                    break
            
            dmass = np.abs(atoms.masses[0] - atoms.masses[1])
            d = atoms.positions[0] - atoms.positions[1]
            dpos = np.sqrt(np.inner(d, d))
    
            print >>f, "%s, %s, %s, %6.2f, %6.2f, %6.2f, %6.2f, %6.2f, %6.2f, %6.2f, %6.2f, %6.2f, %6.2f"%(atoms.names[0], atoms.names[1], el, atoms.masses[0] + atoms.masses[1], np.abs(atoms.masses[0] - atoms.masses[1]), (elecneg1 + elecneg2), delecneg, atoms.calcvol, radius[atoms.names[0]] + radius[atoms.names[1]], np.abs(radius[atoms.names[0]] - radius[atoms.names[1]]), dpos, Elatt, atoms.Eref)


    elif strtype == "exptgap":
        mset = read_json(filename = "exptgap.json")
        f = open('exptgap.csv', 'w')
        print >> f, "(formula, std.mass, sum.mass, diff.mass, std.elecneg, sum.elecneg, diff.elecneg, std.radius, sum.radius, diff.radius, bandgap"
        for atoms in mset:
            elecneg = []
            rad = []
            for name in atoms.names:
                elecneg.append(pauling[name])
                rad.append(radius[name])
            sum_elecneg = np.sum(elecneg) / len(elecneg)
            std_elecneg = np.std(elecneg) 
            diff_elecneg = np.max(elecneg) - np.min(elecneg)
            sum_mass = np.sum(atoms.masses) / len(atoms.masses)
            std_mass = np.std(atoms.masses)
            diff_mass = np.max(atoms.masses) - np.min(atoms.masses)
            sum_radius = np.sum(rad) / len(rad)
            std_radius = np.std(rad)
            diff_radius = np.max(rad) - np.min(rad)
            print >>f, "%s,  %6.2f, %6.2f, %6.2f, %6.2f, %6.2f, %6.2f, %6.2f, %6.2f, %6.2f, %6.2f"%(atoms.formula, std_mass, sum_mass, diff_mass, 
                                                                                                    std_elecneg, sum_elecneg, diff_elecneg,
                                                                                                    std_radius, sum_radius, diff_radius, atoms.Eref)
    data.append({"formula": "%s 1"%(el2), "FERE": 0})
    json.dump(data, open("%s%s.json"%(el1, el2), 'w'))

elmethod = "composition"
sigma = 12 ; lamda = 0.0001 ; kernel = "gaussian"

MAEtrain = []
MAEcross = []
el1 = sys.argv[1]
el2 = sys.argv[2]

maxrun = 5
avgEpredict = np.zeros(11)
for irun in range(maxrun):
    write_json(el1, el2)
    mset = read_json("include_ML_natoms_30/data.json", energytype="formation")
#    mset = read_json("tests/data.json", energytype="formation")
    mcross, mtrain = get_testset(mset)
    mcross = read_json("%s%s.json"%(el1, el2), energytype="formation")
    #mtest, mset = get_testset(mset)
    #mtrain, mcross, mset = get_train_validation_set(mset)
    elmap = get_elements_map(mset)

    result = krr_regression(mtrain, mcross, sigma, lamda, kernel=kernel, elmap=elmap, elmethod=elmethod,
                            loadalpha=False, alphanum=irun)
    MAEtrain.append(result[0])
    MAEcross.append(result[1])
    Epredict = result[2]
    print result[0], result[1]

    print "element formation energy :", Epredict[0], Epredict[-1]
import numpy as np
from read_json import read_json, get_elements_map
from split_dataset import *
from ml import *
from pylab import *
np.random.seed(0)

mset = read_json("larger_data/data.json", energytype="formation")
mcross, mtrain = get_testset(mset)
mcross = read_json("CoSi.json", energytype="formation")
elmap = get_elements_map(mset)
Xtrain, Xcross, Etrain, Ecross = get_X(mtrain, mcross, elmap=elmap, elmethod="composition")
    
alpha = pickle.load(open('alpha.pkl', 'r'))

sigma = 12
print mcross[0].formula
for i in range(len(Ecross)):
    Eest = 0 # estimation for set number i
    Econ = []
    ECo = 0
    for j in range(len(Etrain)):
        d = Xtrain[j] - Xcross[i]
        dd = np.sqrt(np.inner(d, d))
        Etmp = alpha[j] * get_kernel(dd, sigma, kernel="gaussian")
        Eest += Etmp
        Econ.append(np.abs(Etmp))
        if "Co" in mtrain[j].formula:
            ECo += Etmp
    print Eest, ECo
Ejemplo n.º 23
0
    def segmentate(self, data):
        for points in data:
            x = points.get('x')
            y = points.get('y')
            z = points.get('z')
            if (self.isWithinMinimalDistance(x, y, z)):
                self.segmentatedPoints.append(points)
        return self.segmentatedPoints

    def isWithinMinimalDistance(self, x, y, z):
        if (z > self.minimalHeight and z < self.maximumHeight):
            if (x > -self.radius and x < self.radius):
                if (y > -self.radius and y < self.radius):
                    return True
        return False


# for testing purposes only
from read_json import read_json
import json as json
pointcloud = read_json()
seg = segmentator(11.1111111, 4, -0.9, 1.5)
points = seg.segmentate(pointcloud)

file = open("segmentation.txt", "a")
for point in points:
    file.write(
        str(point.get('x')) + " " + str(point.get('y')) + " " +
        str(point.get('z')) + "\n")
file.close()
Ejemplo n.º 24
0
###################################################################################

    # flag = int(input("If you have the transform matrix? \nYes: press '1', No:press '2'\n"))
    # if flag == 2:
    #     cal = Calibrater(image, img_size=size[-2::-1], width=tennis_width, height=tennis_height, data_path=None)
    #     mat = cal._get_trans_matrix()
    #     numpy.savetxt('mat_demo.csv', mat, delimiter = ',')

###################################################################################

    my_matrix = numpy.loadtxt(open("mat_demo.csv","rb"),delimiter=",",skiprows=0) 

###################################################################################
    target_json = "/alphapose/alphapose-results.json"
    path = origin + target_json
    test = read_json.read_json(path)
###################################################################################
    video_path = os.path.join(origin, '/alphapose/badminton.mp4')
    device = cv2.VideoCapture(video_path)
    size = ((int(device.get(cv2.CAP_PROP_FRAME_WIDTH)),int(device.get(cv2.CAP_PROP_FRAME_HEIGHT))))
    success, frame = device.read()
###################################################################################
    success = True
    frame_id = 0
    while success:
        success, frame = device.read()
        img_final_list = find_athlete(test[frame_id], my_matrix, tennis_width, tennis_height)
        # frame_queue.put(frame)
        frame_id = frame_id + 1
        
Ejemplo n.º 25
0
def topapps():
    return [app["package_name"] for app in read_json("topgooglecharts.json")["app_list"]]
Ejemplo n.º 26
0
def main_loop(year, these_awards):
    tweets = read_json.read_json(year)
    ignore_as_first_char = ('RT', '@', '#')
    # remove_as_first_char = ('@', '#')
    tweet_counter = 0

    list_actors()
    list_movies()

    global fashion_list

    fashion_list = {}

    for award in these_awards:
        if any(people_word in award for people_word in ("perform", "direct", "cecil")):
            people_awards.append(award)
        real_awards.append(award)

    for line in tweets:
        if tweet_counter % 10000 == 0:
            print(tweet_counter)

        if tweet_counter == len(tweets) - 1:
            nominees, winners, presenters, hosts, awards, fashion = wrapup()
            end_time = time.time()
            # print(nominees, winners, presenters)
            # print("NOMINEES:", nominees)
            # print("WINNERS:", winners)
            # print("PRESENTERS:", presenters)
            # print("MENTIONS:", mentions)
            # print("RUNTIME:", end_time - start_time, "seconds. (", (end_time - start_time) / 60, "minutes.)")
            return nominees, winners, presenters, hosts, awards, fashion

        lower_text = line['text'].lower()

        if not any(cont_word in lower_text for cont_word in (pass_words + fashion_dict)):
            tweet_counter += 1
            continue

        monologue = True if "monologue" in lower_text else False
        congrats_found = True if "ongrat" in lower_text else False

        fashion = True if any(cont_word in line['text'].lower() for cont_word in fashion_dict) else False

        cleaned = []
        for word in line['text'].split():
            if not word.startswith(ignore_as_first_char):
                # if word.startswith(remove_as_first_char) and len(word) > 1:
                #     word = word[1:]
                # print(word)
                cleaned.append(word)

        tagged = nltk.tag.pos_tag(cleaned)

        lower_tagged = [(item[0].lower(), item[1]) for item in tagged]
        # print(clean_parsed)


        # for i in lower_tagged:
        #     if "ongrat" in i[0]:
        #         congrats_found = True

        # print(lower_tagged)

        # now, match proper nouns to verbs
        length = len(lower_tagged)
        counter = 0
        while counter < length:
            # find every group of words labeled NNP

            if lower_tagged[counter][1] == 'NNP' and "ongrat" not in lower_tagged[counter][0]:
                potential_item, noun_len = full_nnp(lower_tagged[counter: length])
                counter += noun_len

                if monologue:
                    mon_item = can_combine_item_set(potential_item, mentions)
                    try:
                        mentions[mon_item] += 1
                    except:
                        mentions[mon_item] = 1
                    continue

                if fashion:
                    fashion_list.setdefault(potential_item, (0, 1, []))
                    fashion_list[potential_item][2].append(line['text'])
                    blank = fashion_list[potential_item][2]
                    fashion_list[potential_item] = (0,
                                                    fashion_list[potential_item][1] + 1,
                                                    blank)
                    continue

                # find the next verb for each NNP group
                next_verb, verb_ind = find_next_verb(lower_tagged[counter: length])
                if next_verb != "":
                    if not any(word in next_verb for word in all_verbs):
                        counter += 1
                        break
                    new_counter = counter + verb_ind + 1

                    if any(lower_tagged[new_counter - 2][0] == negate_word
                           for negate_word in ("didn't", "didnt", "not")):
                        negated = True
                    else:
                        negated = False

                    # find the next group of nouns starting with 'best'
                    badaward = find_next_award_maria(lower_tagged, new_counter)
                    newbadaward = ""

                    # add award
                    for word in badaward:
                        newbadaward = newbadaward+word+" "
                    try:
                        badawardnames[newbadaward] += 1
                    except:
                        badawardnames[newbadaward] = 1

                    # find the associated award name
                    award = find_next_award_hardcoded(lower_tagged, new_counter)
                    if award:
                        update_master(award, potential_item, next_verb, negated)
                        if congrats_found:
                            update_master(award, potential_item, next_verb, True)
            else:
                counter += 1

        tweet_counter += 1
Ejemplo n.º 27
0
import re
from pprint import pprint
from write_json import write_json
from read_json import read_json

if __name__ == "__main__":
    img_name_pattern = '[a-zA-Z0-9\\-_]+\\|,(png|PNG|svg|SVG|jpeg|jpg|JPEG|JPG|gif|GIF)'
    path = "./before/test5.json"
    img_url_counts = read_json(path)
    new_url_obj = {}
    for img_url in img_url_counts.keys():
        res = re.search(img_name_pattern, img_url)
        new_url_obj[res.group(0)] = img_url_counts[img_url]
    write_json(new_url_obj, path)
Ejemplo n.º 28
0
    def __init__(self):
        try:
            # ロガーの設定
            self.logger = logging.getLogger(__name__)
            self.logger.debug("")

            # 設定情報読み込み
            config = read_json(COMMON_PATH + const.CONF_FILE)
            self.logger.info("%s:%s", const.CONF_FILE, json.dumps(
                config.data, indent=4, sort_keys=True, ensure_ascii=False))
            self.config = config.data[const.SETTING]

            # IF情報取得
            self.ifinfo = {}
            self.ifinfo = self.get_interface_info(
                self.config[const.MLD_ESW_IFNAME])

            # QueryのQQIC設定
            self.QQIC = self.calculate_qqic(
                self.config[const.REGURALY_QUERY_INTERVAL])

            # 視聴情報初期化
            self.ch_info = channel_info(self.config)

            # スイッチ情報読み込み
            switches = read_json(COMMON_PATH + const.SWITCH_INFO)
            self.logger.info("%s:%s", const.SWITCH_INFO, json.dumps(
                switches.data, indent=4, sort_keys=True, ensure_ascii=False))
            self.switch_mld_info = switches.data[const.SW_TAG_MLD_INFO]
            self.switch_mc_info = switches.data[const.SW_TAG_MC_INFO]
            self.switches = switches.data[const.SW_TAG_SWITCHES]
            for switch in self.switches:
                if switch[const.SW_TAG_NAME] == const.SW_NAME_ESW:
                    self.edge_switch = switch
                    break

            # マルチキャスト情報読み込み
            mc_info = read_json(COMMON_PATH + const.MULTICAST_INFO)
            self.logger.info("%s:%s", const.MULTICAST_INFO, json.dumps(
                mc_info.data, indent=4, sort_keys=True, ensure_ascii=False))
            self.mc_info_list = mc_info.data[const.MC_TAG_MC_INFO]
            self.mc_info_dict = {}
            for mc_info in self.mc_info_list:
                self.mc_info_dict[
                    mc_info[const.MC_TAG_MC_ADDR],
                    mc_info[const.MC_TAG_SERV_IP]] = mc_info

            # bvidパターン読み込み
            bvid_variation = read_json(COMMON_PATH + const.BVID_VARIATION)
            self.logger.info("%s:%s", const.BVID_VARIATION, json.dumps(
                bvid_variation.data, indent=4, sort_keys=True,
                ensure_ascii=False))
            bvid_variations = bvid_variation.data[const.BV_TAG_BV_INFO]
            self.bvid_variation = {}
            for bvid_variation in bvid_variations:
                # ":"で区切られたkeyを昇順にソートして再設定
                bvid_key = const.DELIMIT_COLON.join(sorted(bvid_variation[
                    const.BV_TAG_KEY].split(const.DELIMIT_COLON)))
                self.bvid_variation[bvid_key] = \
                    bvid_variation[const.BV_TAG_BVID]

            # ZMQの接続文字列を取得
            zmq_conn = self.get_zmq_connect(config)
            self.zmq_pub = zmq_conn[0]
            self.zmq_sub = zmq_conn[1]

            # ZMQ送受信用ソケット生成
            self.create_socket(self.zmq_pub, self.zmq_sub)
            # Flowmod生成用インスタンス
            self.flowmod_gen = flow_mod_generator(self.switches)

        except:
            self.logger.error("%s ", traceback.print_exc())
            self.end_process()
from bs4 import BeautifulSoup
import requests
import os
import re
import json
import collections
from read_json import read_json

items = read_json('items')
affixes = set()

#print(json.dumps(data, indent=4, sort_keys=True))
for item in items:
    for affix in item['affixes']:
        affixes.add(affix['name'])

for affix in sorted(affixes, key=str.casefold):
    print(affix)
Ejemplo n.º 30
0
        "forest": 0.186148071251,
        "svr": 0.135622957115, }, 
    "coulomb_ZiZj/d" : {
        "knn": 0.296029527716, 
        "krr":  0.20612285173126219, 
        "forest": 0.206198925628, 
        "svr": 0.245838741856, }, 
    "coulomb_1/d" : {
        "knn": 0.320368738508, 
        "krr": 0.21328211175452691, 
        "forest": 0.20198798341, 
        "svr": 0.259278560667, }, 
}


mset = read_json("data.json")
mtest, mset = get_testset(mset)
mtrain, mcross, mset = get_train_validation_set(mset)
elmap = get_elements_map(mset)

results = {}
for elmethod in (None, "composition", #"constant", 
                 #"coordination", "inverse_cord", 
                 "coulomb_ZiZj/d", 
                 "coulomb_1/d",):
    results[elmethod] = {}
    results[elmethod]["knn"] = knn_regression(mtrain, mcross, 5, elmap=elmap, elmethod=elmethod)
    results[elmethod]["krr"] = krr_regression(mtrain, mcross, 50, 0.01, elmap=elmap, elmethod=elmethod)[1]
    results[elmethod]["forest"] = sklearn_regression(mtrain, mcross, "forest", elmap=elmap, elmethod=elmethod)  
    results[elmethod]["svr"] = sklearn_regression(mtrain, mcross, "svr", elmap=elmap, elmethod=elmethod)  
Ejemplo n.º 31
0
def plot_error_in_volume(mset):
    volerror = []
    for atoms in mset:
        if atoms.exptvol is not None:
            volerror.append((atoms.calcvol - atoms.exptvol) / atoms.exptvol * 100)
            if np.abs(volerror[-1]) > 50:
                print atoms.formula, atoms.icsdno, atoms.exptvol, atoms.calcvol, volerror[-1]

    plot_all(volerror,"Error in volume (%) : (Vcalc - Vexpt) / Vexpt * 100")


def plot_dict(a):
    X = np.arange(len(a))
    bar(X, a.values(), align="center", width=0.5)
    xticks(X, a.keys())
    ymax = max(a.values()) + 20
    ylim(0, ymax)
    show()

if __name__ == "__main__":
    from read_json import read_json
    import sys
    mset = read_json(sys.argv[1], energytype="formation")
#    mset = read_json("include_ML_natoms_10/data.json")
    plot_error_in_volume(mset)
    plot_Eref(mset)
    plot_natoms(mset)
    plot_dict(get_unique_elements(mset))
    plot_dict(get_nelements_per_cell(mset))