Ejemplo n.º 1
0
    def step(self, states, actions, rewards, next_states, dones,
             num_current_episode):
        """Save experience in replay memory, and use random sample from buffer to learn"""

        #self.memory.add(states, It mainly reuse function from ``actions, rewards, next_states, dones)
        self.memory.add(encode(states), encode(actions), rewards,
                        encode(next_states), dones)

        # If enough samples in the replay memory and if it is time to update
        if (len(self.memory) > BATCH_SIZE) and (num_current_episode %
                                                UPDATE_EVERY == 0):

            # Note: this code only expects 2 agents
            assert (len(self.agents) == 2)

            # Allow to learn several time in a row in the same episode
            for i in range(MULTIPLE_LEARN_PER_UPDATE):
                # Sample a batch of experience from the replay buffer
                experiences = self.memory.sample()
                # Update Agent #0
                self.maddpg_learn(experiences, own_idx=0, other_idx=1)
                # Sample another batch of experience from the replay buffer
                experiences = self.memory.sample()
                # Update Agent #1
                self.maddpg_learn(experiences, own_idx=1, other_idx=0)
Ejemplo n.º 2
0
 def search_game(self, title):
     """Return a list of Game objects for a query for the given title."""
     all_games = []
     encoded_title = helper.encode(title)
     # full game search url
     url = helper.gamefaqsURL_search_game + encoded_title
     soup = helper.get_bs4(url)
     tags = soup.find_all("div", {"class": "pod"})
     for tag in tags:
         criteria_tag = tag.find("h2")
         # only include games under best matches category
         if criteria_tag and criteria_tag.text in search_matches:
             best_matches = tag.find_all("tr")
             for match in best_matches:
                 # get game platform
                 p = match.find("td", {"class": "rmain"}).text
                 platform = p.replace(" ", "").replace("\r\n", "")
                 info = match.find("td", {"class": "rtitle"})
                 # get game url
                 game_url = helper.gamefaqsURL_base + info.contents[1]["href"]
                 # get game title
                 game_title = info.text.replace("\n", "")
                 obj = game.Game(game_url, dict(title=game_title,
                                                 platform=platform))                                           
                 all_games.append(obj)
             break
     return all_games
Ejemplo n.º 3
0
 def search_company(self, name, limit=40):
     """Returns a list of Company objects for a query for the given name.
     
     The maximum number of companies to be returned is set by limit.
     """
     companies = []
     encoded_name = helper.encode(name)
     # full company search url
     url =  helper.gamefaqsURL_search_company + encoded_name
     soup = helper.get_bs4(url)
     company_tags = soup.find_all("table")[1].find_all("a")
     count = 0
     for tag in company_tags:
         companies.append(company.Company(helper.gamefaqsURL_base + tag["href"],
                                          dict(name=tag.text)))
         count += 1
         if (count >= limit):
             break
     return companies
Ejemplo n.º 4
0
def load_files(cases_text_directory, output_dir, output_file):
    tokenizer = nltk.download('/tokenizers/punkt/english.pickle')
    path = cases_text_directory + "/"
    filenames = os.listdir(path)
    count = 0
    arr_json_file = []
    arr_metadata = {}
    json_output_file = {}
    for filename in filenames:
        outputarray = {}
        if '.pdf.txt' in filename:
            count = count + 1
            with open(path + filename) as fp:
                lines = fp.readlines()
            #read line by line
            lines = [line.rstrip('\n') for line in open(path + filename)]
            text = ' '.join(str(e) for e in lines)
            text = ''.join([i if ord(i) < 128 else ' ' for i in text])
            text = helper.encode(text.replace("  ", " "))
            text = text.replace(
                "Williams, Brooke 6/26/2015 For Educational Use Only", "")
            outputarray["body"] = text
            outputarray["filename"] = filename
            outputarray["entities"] = []
            outputarray["title"] = ""
            outputarray["file_count"] = count
            arr_json_file.append(outputarray)

    arr_metadata["@count_at_loading"] = str(count)
    arr_metadata["@time_date"] = str(time.strftime('%X %x %Z'))
    arr_metadata["@input_directory"] = str(cases_text_directory)
    json_output_file["@data"] = arr_json_file
    json_output_file["@metadata"] = arr_metadata

    with open(output_dir + '/' + output_file, 'w') as datafile:
        json.dump(json_output_file,
                  datafile,
                  indent=4,
                  sort_keys=True,
                  separators=(',', ':'))
Ejemplo n.º 5
0
number_of_feat = 0
epochs = 1
runs = 1500
start = datetime.datetime.now()

for i in range(runs):
    clf = DecisionTreeRegressor()
    num_feat = random.randint(1, 76)

    features, train, test, y_train, y_test = helper.split_train_test(
        training_set,
        training_target,
        num_feat,
        i,
    )
    train_encoded, test_encoded = helper.encode(train, test)
    helper.train_model(clf, train_encoded, y_train, epochs)

    y_predicted = clf.predict(train_encoded[0:10])

    test_score = helper.rsme_eval(clf, y_test, test_encoded)

    if test_score < scores or scores == -999:
        bfeatures = features
        scores = test_score
        seed = i
        number_of_feat = num_feat
    approx = ((((datetime.datetime.now() - start).seconds) /
               (i + 1)) * runs) - (datetime.datetime.now() - start).seconds
    print("time left: %s | %s/%s | best score: %s" %
          (str(datetime.timedelta(seconds=approx)), i + 1, runs, scores),
Ejemplo n.º 6
0
def named_entities(START_VALUE,END_VALUE,in_dir,in_file):

    properties_json = json.load(open("properties.json"))
    #nltk sentences, built in
    tokenizer = nltk.download('/tokenizers/punkt/english.pickle')
    #STANFORD TAGGER
    # "stanford_classifer":"stanford-ner/english.muc.7class.distsim.crf.ser.gz",
    #"stanford_jar":"stanford-ner/stanford-ner.jar"
    classifier = properties_json["stanford_classifer"]
    jar =properties_json["stanford_jar"]
    st = StanfordNERTagger(classifier,jar)

    with open(in_dir+'/'+in_file,"r") as fp:
        SUPER_JSON = json.load(fp)

    #add sentences
    counter = 0
    for items in SUPER_JSON["@data"]:
        counter = counter + 1
        items["counter"]= counter
        text = items["title"] + ". " + items["body"]
        items["entities"]=[]
        SENTENCES_TO_TAG = tokenizer.tokenize(text)
        items["text"] = text
        #no longer needed original_body
        items["original_body"]=[]
        items["sentences"] = SENTENCES_TO_TAG

    print "START:"
    print datetime.datetime.now().isoformat()
    entity_tags = {"PERSON","ORGANIZATION","LOCATION","DATE"}
    for i in xrange(len(SUPER_JSON["@data"])):
        if i >= START_VALUE and i < END_VALUE:
            # 20 seconds for 1 file
            #try:
                list_tagged_text = []
                jsondata =[]
                print i, SUPER_JSON["@data"][i]["counter"]
                for sentences in SUPER_JSON["@data"][i]["sentences"]:
                    tokenized_text = word_tokenize(helper.encode(sentences))
                    tagged_text = st.tag(tokenized_text)

                    list_tagged_text.append(tagged_text)
                    for j in xrange(len(tagged_text)):
                        for entity_tag in entity_tags:
                            if entity_tag in tagged_text[j][1]:
                                jsondictitem = {entity_tag:tagged_text[j][0]}
                                jsondata.append(jsondictitem)
                SUPER_JSON["@data"][i]["entities"]=jsondata

            #Use this for sentence entities
            #sentence_data = {"sentence":sentences,"entities":sentence_entities}
            #sentence_arr.append(sentence_data)

            #except:
            #    SUPER_JSON["@data"][i]["entities"]=[{"ERROR":"ERROR"}]
    print "END:"
    print datetime.datetime.now().isoformat()

    for i,items in enumerate(SUPER_JSON["@data"]):
        entitytagslist=get_entitytagslist(items["entities"])
        items["entities"]=get_final_entitytagslist(entitytagslist,items["entities"])


    #for j,item in enumerate(SUPER_JSON["@data"]):
    #    for j, sentence_entity in enumerate(item["sentence_entities"]):
    #        entitytagslist=get_entitytagslist(sentence_entity["entities"])
    #        sentence_entity["combined_entities"]=get_final_entitytagslist(entitytagslist,sentence_entity["entities"])

    #@create new json file
    SUPER_JSON["@metadata"]["@time_date_ner"]=str(time.strftime('%X %x %Z'))
    with open(in_dir+'/'+in_file, 'w') as datafile:
        json.dump(SUPER_JSON, datafile, indent=4, sort_keys=True, separators=(',', ':'))
Ejemplo n.º 7
0
    def add_answer(self, car, road):
        ans = [encode(car), encode(road)]

        self.answer_key[self.frame_count] = ans

        self.frame_count += 1
import helper
import sklearn
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split

training_set, training_target, test_set, test_ids = helper.get_data()

X_train, X_test, y_train, y_test = train_test_split(training_set,
                                                    training_target,
                                                    test_size=0.2,
                                                    random_state=1)
train_encoded, test_encoded = helper.encode(X_train, X_test)

parameters = {
    'criterion': ['mse', 'friedman_mse', 'mae'],
    'splitter': ['best', 'random'],
    'max_depth': [10, None],
    'max_features': ['auto', 'sqrt', 'log2', None],
    'presort': [True, False]
}

clf = GridSearchCV(DecisionTreeRegressor(), parameters)
clf.fit(train_encoded, y_train)
print('score', clf.score(train_encoded, y_train))
print(clf.best_params_)
Ejemplo n.º 9
0
feature_set_1 = ['Neighborhood', '3SsnPorch', 'HeatingQC', 'LotArea', 'SaleType', 'MasVnrType', 'LotShape', 'OpenPorchSF', 'FullBath', 'BsmtFinSF1', 'MSZoning', 'TotalBsmtSF', 'HalfBath', 'GarageType', 'BsmtFinSF2', 'BsmtUnfSF', 'RoofMatl', 'MSSubClass', 'Condition1', 'MasVnrArea', 'Exterior1st', 'KitchenAbvGr', 'GarageYrBlt', 'Foundation', 'OverallCond', 'BsmtHalfBath', 'EnclosedPorch', 'BsmtFinType2', 'BldgType', 'FireplaceQu', 'LotFrontage', 'LandSlope', 'GarageFinish', 'BedroomAbvGr', 'YearBuilt', 'MiscVal', '1stFlrSF', 'OverallQual', 'MoSold']
feature_set_2 = ['OpenPorchSF', 'GarageYrBlt', '1stFlrSF', 'SaleType', 'LandContour', 'HouseStyle', 'BsmtFinType1', 'WoodDeckSF', 'LotShape', 'YrSold', 'BsmtFinSF1', 'BsmtQual', 'YearBuilt', 'BsmtCond', 'GarageArea', 'RoofStyle', 'GrLivArea', 'Electrical', 'TotalBsmtSF', 'BedroomAbvGr', 'KitchenQual', 'PavedDrive', 'BsmtExposure', 'Fireplaces', 'YearRemodAdd', 'Heating', 'BldgType', 'OverallCond', 'LotConfig', 'EnclosedPorch', 'Functional', 'OverallQual', 'ExterCond', 'LandSlope', 'GarageType', '2ndFlrSF', 'MiscVal', 'MSSubClass', 'BsmtHalfBath', 'LotArea', 'MSZoning', 'LotFrontage', 'Neighborhood', 'LowQualFinSF', 'Foundation', '3SsnPorch', 'FullBath', 'BsmtFinSF2', 'PoolArea', 'BsmtFinType2', 'RoofMatl', 'HalfBath', 'GarageCond', 'HeatingQC', 'GarageFinish', 'MasVnrArea', 'KitchenAbvGr', 'BsmtFullBath', 'GarageQual', 'Utilities', 'TotRmsAbvGrd']
feature_set_3 = ['ExterCond', 'BsmtUnfSF', 'LotArea', 'Condition1', 'Condition2', 'LotShape', 'BsmtHalfBath', 'CentralAir', 'GarageArea', 'Functional', 'WoodDeckSF', 'FullBath', 'BsmtFullBath', 'Neighborhood', 'EnclosedPorch', 'Electrical', 'GarageFinish', 'GarageType', 'Foundation', 'YearBuilt', 'BsmtFinSF1', 'BldgType', 'GarageYrBlt', 'MasVnrArea', 'Street', 'SaleCondition', '2ndFlrSF', 'LandContour', 'RoofStyle', 'MasVnrType', 'Fireplaces', 'YrSold', 'GarageCars', 'BsmtFinType1', 'OpenPorchSF', 'TotalBsmtSF', 'Exterior2nd', '1stFlrSF', 'OverallCond', 'HeatingQC', 'MoSold', 'Heating', 'KitchenQual', 'LotFrontage', 'RoofMatl', 'BedroomAbvGr', 'LandSlope', 'OverallQual', 'FireplaceQu', '3SsnPorch', 'SaleType', 'MiscVal', 'BsmtFinType2', 'ScreenPorch', 'Exterior1st', 'Utilities', 'GarageQual', 'Fence', 'BsmtCond', 'GarageCond', 'HalfBath', 'HouseStyle', 'KitchenAbvGr', 'ExterQual', 'YearRemodAdd', 'BsmtQual', 'TotRmsAbvGrd', 'LotConfig', 'GrLivArea', 'MSZoning', 'PoolArea', 'BsmtFinSF2', 'LowQualFinSF']

xgb_v1_1 = get_v1()
xgb_v1_2 = get_v1()
xgb_v1_3 = get_v1()
xgb_v2_1 = get_v2()
xgb_v2_2 = get_v2()
xgb_v2_3 = get_v2()

train1, test1, y_train1, y_test1 = helper.evaluate_split_train_test(training_set, training_target, feature_set_1)
train2, test2, y_train2, y_test2 = helper.evaluate_split_train_test(training_set, training_target, feature_set_2)
train3, test3, y_train3, y_test3 = helper.evaluate_split_train_test(training_set, training_target, feature_set_3)

train_encoded1, test_encoded1 = helper.encode(train1, test1)
train_encoded2, test_encoded2 = helper.encode(train2, test2)
train_encoded3, test_encoded3 = helper.encode(train3, test3)

helper.train_model(xgb_v1_1, train_encoded1, y_train1, 1)
helper.train_model(xgb_v1_2, train_encoded2, y_train2, 1)
helper.train_model(xgb_v1_3, train_encoded3, y_train3, 1)
helper.train_model(xgb_v2_1, train_encoded1, y_train1, 1)
helper.train_model(xgb_v2_2, train_encoded2, y_train2, 1)
helper.train_model(xgb_v2_3, train_encoded3, y_train3, 1)

rsme = []
rsme.append(helper.rsme_eval(xgb_v1_1, y_test1, test_encoded1))
rsme.append(helper.rsme_eval(xgb_v1_2, y_test2, test_encoded2))
rsme.append(helper.rsme_eval(xgb_v1_3, y_test3, test_encoded3))
rsme.append(helper.rsme_eval(xgb_v2_1, y_test1, test_encoded1))
Ejemplo n.º 10
0
    18,
    18,
    18,
    18,
]

DATA_PATH = '../nanopore_dna_storage_data/'
infile_name = DATA_PATH + 'encoded_file/data_files.tar.bz2.enc'
for i in range(13):
    print('i', i)
    print('bytes_per_oligo', bytes_per_oligo[i])
    print('RS_redundancy', RS_redundancy[i])
    print('conv_m', conv_m[i])
    print('conv_r', conv_r[i])
    print('pad', pad[i])
    helper.encode(data_file=infile_name,
                  oligo_file=DATA_PATH + 'oligos_files/reads.' + str(i),
                  bytes_per_oligo=bytes_per_oligo[i],
                  RS_redundancy=RS_redundancy[i],
                  conv_m=conv_m[i],
                  conv_r=conv_r[i],
                  pad=pad[i])
    with open(DATA_PATH + 'oligo_files/reads.' + str(i)) as f_reads, open(
            DATA_PATH + 'oligo_files/oligos_' + str(i) + '.fa',
            'w') as f_oligos:
        for j, line in enumerate(f_reads):
            f_oligos.write('>oligos_' + str(i) + '_' + barcode_start[i] + '_' +
                           barcode_end[i] + '_' + str(j) + '\n')
            f_oligos.write(barcode_start[i] + line.rstrip('\n') +
                           barcode_end[i] + '\n')
Ejemplo n.º 11
0
def DES(filename=None,key_file=None,action="encrypt"):
    # ***** ERROR CHECKING *****
    # check that required arguments are given
    if key_file == None and filename == None:
        print("Error: need to input filename and key")
        return None
    # check that key is given
    if (not isinstance(key_file,str)):
        print("Error: need to input key file")
        return None
    # check that key file (kf) exists
    try: kf = open(key_file,"rb")
    except:
        print("Error: " + key_file + " does not exist")
        return None
    # check for invalid file
    if (not isinstance(filename,str)):
        print("Error: First input must be filename (string)")
        return None
    # check that file exists
    try: f = open(filename,"rb")
    except:
        print("Error: " + filename + " does not exist")
        return None

    key = []
    with kf as x:
        temp = x.read(); key.append(temp);
        while temp != b"":
            temp = x.read(); key.append(temp)
    # do weird binary stuff - dunno how it even got working
    theKey = key[0];
    if theKey[-1] == 10: theKey = theKey[:-1]
    # check correct key length
    if (len(theKey) != 8):
        print(theKey)
        print("Error: must give exactly 8 keys")
        return None

    # ***** ENCRYPTION AND DECRYPTION *****
    b = []  # bytes in file
    with f as x:
        temp = x.read(2); b.append(temp)
        while temp != b"":
            temp = x.read(2); b.append(temp)
    # *** ENCRYPT ***
    bts = []    # the actual encoded thing
    if (action=="encrypt"):
        print("ENCRYPTING")
        for i in range(0,8):
            cnt = 0
            while b[cnt] != b"":
                b[cnt] = h.encode(b[cnt],theKey[i])
                if i == 7: bts = bts+b[cnt]
                cnt = cnt+1
    # *** DECRYPT ***
    else:
        print("DECRYPTING")
        for i in range(0,8):
            cnt = 0
            while b[cnt] != b"":
                b[cnt] = h.decode(b[cnt],theKey[7-i])
                if i == 7: bts = bts+b[cnt]
                cnt = cnt+1
    return [bts,key]