def bestFirstSearch(self, startNode, goalNode, heuristic): bestFitstQueue = PriorityQueue(0) visitedNode = {} parent = {} utility = Utility() goalNodeStateDict = utility.getGoalNodeStateDict(goalNode) bestFitstQueue.put((1, startNode)) while not(bestFitstQueue.empty()): poppedTuple = bestFitstQueue.get() self.nodesVisited+=1 popped = poppedTuple[1] if (popped == goalNode): # print popped==goalNode return self.backtrack(popped) else: visitedNode[popped.stringRep]=popped if self.maxDepth < popped.level: self.maxDepth = popped.level popped.expand() for child in popped.children: if child.stringRep not in visitedNode: heuristicOfChild = utility.getHeuristic(child, goalNode, heuristic, goalNodeStateDict) bestFitstQueue.put((heuristicOfChild, child)) self.maxQueueSize +=1
def proof_of_work(a, last_block, data): func = inspect.currentframe().f_back.f_code logging.info("Starting proof of work") start = time.time() interval = 20 now = time.time() + 1 effort, pow_hash_object = Utility.genhash(last_block.index + 1, time.time(), data, last_block.hash) leading_zeroes = Utility.leadingzeroes(pow_hash_object.digest()) while leading_zeroes <= variables.WORK: now = time.time() + 1 if int(now - start) % interval + 1 == 0: logging.debug("Checking for messages") messages = [] while not a.empty(): obj = a.get() logging.debug("Got {} from queue".format(obj)) messages.append(obj) for message in messages: if message[0] == "ip": logging.debug("That's an ip {} adding to peers".format(message[1])) variables.PEER_NODES.append(str(messages[1])) continue logging.debug("not an IP, putting it back message:{}".format(message)) a.put(message) start = time.time() consensus = consensus() if consensus: logging.info("Received a consensus while doing POW") return False, consensus effort, pow_hash_object = Utility.genhash(last_block.index + 1, now, data, last_block.hash) leading_zeroes = Utility.leadingzeroes(pow_hash_object.digest()) retBlock = Block(last_block.index + 1, now, pow_hash_object.hexdigest(), effort, data, last_block.hash) logging.info("Farmed a block returning: {}".format(retBlock)) return True, retBlock
def start(): global node, blockchain genesis = Utility.create_genesis_block() blockchain = Blockchain(genesis.index, genesis.timemade, genesis.proof_of_work, genesis.effort, genesis.transactions, genesis.previous_hash) node.config['SECRET_KEY'] = Utility.createHexdigest(User.password) node.run(host="0.0.0.0", port=variables.PORT)
def write_value(cls, io, value, master, device, step=1): try: address = io.address byte_order = device.byte_order if 'int' in io.type or 'dword' in io.type or 'float' in io.type: (cmd, mb_addr) = cls().__transfer_write_addr( int(address) + (step * 2), io.type) else: (cmd, mb_addr) = cls().__transfer_write_addr( int(address) + step, io.type) if 'int' in io.type: value1 = int(value) values = Utility.toWords(value1, byte_order) elif 'dword' in io.type: value1 = int(value) if ("unsigned" in io.type and 0 <= value1 <= 4294967295) or \ ("unsigned" not in io.type and -2147483648 < value1 < 2147483648): values = Utility.toWords(value1, byte_order) else: raise ValueError("Invalid values: %s" % value1) elif io.type == 'float': value1 = float(value) values = Utility.Float2ieee754Words(value1, byte_order) elif 'word' in io.type: value1 = int(value) if ("unsigned" in io.type and 0 <= value1 <= 65535) or \ ("unsigned" not in io.type and -32768 <= value1 <= 32767): if re.search(r'(^ba($|dc$))|(^dcba$)', byte_order) is not None: h = ((value1 & 0xff00) >> 8) l = (value1 & 0xff) value1 = ((l << 8) & 0xff00) | h if cmd == cst.WRITE_SINGLE_COIL: values = value1 else: values = [ value1, ] else: raise ValueError("Invalid values: %s" % value1) else: if cmd == cst.WRITE_SINGLE_COIL: values = value else: values = [ value, ] logger.debug( "write ctrl [modbus] id: %s, cmd: %s, mb_addr: %s, value: %s" % (device.id, cmd, mb_addr, values)) master.execute(device.machine_address, cmd, mb_addr, output_value=values) except Exception as e: logger.debug("Write error [modbus] @ %s : %s" % (io.mb_id, e))
def start(e): global node, blockchain,mining_process,event if not len(Variables.PEER_NODES) > 0: genesis = Utility.create_genesis_block() blockchain = Blockchain(genesis) else: consensus() event = e mining_process = Process(target=Mining.mine, args=(event,)) mining_process.start() logging.debug("Mining_classes Started") node.config['SECRET_KEY'] = Utility.createHexdigest(User.password) node.run(host="0.0.0.0", port=Variables.PORT)
def getFeatureRMSEAgainstBaseline(cols=['color_exist']): utility = Utility() utility.startTimeTrack() # This part skips the feature training and simply use it. print("len(cols):", len(cols), cols) print("Reading feature set") all_df = pd.read_csv('../data/features_doc2vec_sense2vec_pmi_20170418.csv') feature_train_df = all_df[:74067] # Must drop these columns for OrdinalRegression feature_train_df.drop('wm_product_brand', axis=1, inplace=True) cols.append('relevance_int') cols.append('id') cols.append('search_term') cols.append('product_uid') cols.append('relevance') cols.append('product_idx') cols.append('Word2VecQueryExpansion') print(cols) feature_train_df = feature_train_df.filter(items=cols, axis=1) feature_test_df = all_df[74067:] feature_test_df.drop('relevance', axis=1, inplace=True) utility.checkpointTimeTrack() print("#### Running: OrdinalRegression ordridge training ####") # dp=DataPreprocessing() print("feature_train_df:", list(feature_train_df)) # trainDF,validateDF=dp.generateValidationSet(train_df) orModel = OrdinalRegressionRanker('ordridge') orModel.train(feature_train_df, None) # orModel.gridSearch(feature_train_df, None) print("#### Completed: OrdinalRegression ordridge training ####") utility.checkpointTimeTrack()
def mine(a): func = inspect.currentframe().f_back.f_code logging.info("Starting to mine") # See if other blockchains exist blockchain = consensus() if not blockchain: logging.info("Didn't receive a blockchain from anyone and need to make one") # We didn't find one, need to make one variables.BLOCKCHAIN.append(Utility.create_genesis_block()) else: logging.info("Received a blockchain from the net") # See if we got any blocks from someone, save it variables.BLOCKCHAIN = blockchain message = Utility.buildmessage("blockchain", variables.BLOCKCHAIN) logging.debug("Adding {} to queue".format(message)) a.put(message) url = "http://" + variables.MINER_NODE_URL + ":" + str(variables.PORT) + "/blocks?update=" + User.public_key logging.debug("accessing url via GET") requests.get(url) logging.debug("Done accessing url") while True: last_block = variables.BLOCKCHAIN[len(variables.BLOCKCHAIN) - 1] # -get transactions url = "http://" + variables.MINER_NODE_URL + ":" + str(variables.PORT) + "/txion?update=" + User.public_key logging.debug("Getting transactions from {}".format(url)) transactions = requests.get(url).content logging.debug("Done getting transactions") variables.PENDING_TRANSACTIONS = json.loads(transactions) logging.warning("type of transaction: {}".format(type(variables.PENDING_TRANSACTIONS))) variables.PENDING_TRANSACTIONS.append({ "from": "network", "to": User.public_key, "amount": 1.0}) # mine using the updated blockchain pow, pow_output = proof_of_work(a, last_block, variables.PENDING_TRANSACTIONS) variables.PENDING_TRANSACTIONS = [] if pow: logging.info("Mined a block {}".format(pow_output)) variables.BLOCKCHAIN.append(pow_output) else: logging.info("Consensus returned a blockchain {}".format(pow_output)) variables.BLOCKCHAIN = pow_output logging.debug("Adding that blockchain to the Queue") a.put(["mine", variables.BLOCKCHAIN]) url = "http://" + variables.MINER_NODE_URL + ":" + str(variables.PORT) + "/blocks?update=" + User.public_key logging.debug("accessing url via GET") requests.get(url) logging.debug("Done accessing url")
def proof_of_work(last_block, data): index_to_use = last_block.index + 1 func = inspect.currentframe().f_back.f_code logging.info("Starting proof of work") done = False while not done: now = time.time() effort, pow_hash_object = Utility.genhash(index_to_use, now, data, last_block.hash) leading_zeroes = Utility.leadingzeroes(pow_hash_object.digest()) if leading_zeroes >= variables.WORK: done = True retBlock = Block(index_to_use, now, pow_hash_object.hexdigest(), effort, data, last_block.hash) logging.info("Farmed a block returning: {}".format(retBlock)) return retBlock
def find_new_chains(): func = inspect.currentframe().f_back.f_code logging.info("Starting to find new chains") # Get the blockchains of every other node peers = variables.PEER_NODES logging.debug("peers: {}".format(len(peers))) other_chains = [] for node_url in peers: blockchain_json = None found_blockchain = [] url = "http://" + node_url + ":" + str(variables.PORT) + "/blocks" try: logging.debug("Attempting to access {}".format(node_url)) blockchain_json = requests.post(url) except: logging.warning("Failed to access {}, removing from peers".format(node_url)) variables.PEER_NODES.remove(node_url) continue # Convert the JSON object to a Python dictionary if blockchain_json is not None: blockchain_json = json.loads(blockchain_json.content) for block_json in blockchain_json: temp = Block() temp.importjson(block_json) if Utility.validate(temp): logging.debug("Block validated, adding") found_blockchain.append(temp) else: logging.warning("Block NOT valid, next peer") continue # Verify other node block is correct logging.debug("Attempting to validate this: {}".format(found_blockchain)) validated = Utility.validate_blockchain(found_blockchain) if validated: logging.debug("Blockchain did validate") other_chains.append(found_blockchain) else: logging.warning("Blockchain did not validate") continue logging.info("Ending find new chains") return other_chains
def proof_of_work(last_block, data): global event func = inspect.currentframe().f_back.f_code done = False now = None pow_hash_object = None effort = None while not done: if event.is_set(): print("Exiting block creation") return False now = time.time() index_to_use = last_block.index + 1 effort, pow_hash_object = Utility.genhash(index_to_use, now, data, last_block.hash) leading_zeroes = Utility.leadingzeroes(pow_hash_object.digest()) if leading_zeroes >= Variables.WORK: done = True return_block = Block(index_to_use, now, pow_hash_object.hexdigest(), effort, data, last_block.hash) return return_block
def generator_fix_length_vars(self, values, name, timestamp, length=200): vals = [] pkg_length = 0 for group in self.groups: if name != group.name: continue for var in group.vars: key = var.id if var.calc_mode == 'instant' else "%s.%s" % ( group.name, var.id) if key in values.keys(): val = dict() val['value'] = values[key] val['id'] = var.id val['timestamp'] = Utility.getTimeStr(timestamp) val['endTime'] = Utility.getTimeStr(timestamp) vals.append(val) pkg_length += 1 if pkg_length >= length: pkg_length = 0 yield vals, group.name vals = self.clear_list(vals) yield vals, group.name pkg_length = 0 vals = self.clear_list(vals)
def block(): global blockchain ip = request.remote_addr if ip != "127.0.0.1" and ip not in Variables.PEER_NODES: Variables.PEER_NODES.append(ip) if ip == '127.0.0.1': raw = request.data.decode('utf-8') parsed = xmltodict.parse(raw) b = Block() b.import_from_xml(parsed['block']) print("I made", b) blockchain.add(b) #Distribute the block to our peers for peer in Variables.PEER_NODES: try: url = "http://" + peer + ":" + str(Variables.PORT) + "/block" xml = b.export_to_xml() headers = {'Content-Type': 'application/xml'} resp = requests.post(url, data=xml, headers=headers).text except: Variables.PEER_NODES.remove(peer) else: block_number = None try: block_number = int(request.args['block_number']) except: pass if block_number is not None: return blockchain.get(block_number).export_to_xml() else: raw = request.data.decode('utf-8') parsed = xmltodict.parse(raw) b = Block() b.import_from_xml(parsed['block']) print("receiv", b) if Utility.validate(b): blockchain.add(b) global event event.set() else: print("Block did not validate",ip) return "0"
def scrapPage(self, url, verbose=False): if verbose: print(url) soup = self.loadSoup(url) soup = self.sanitize(soup) #split strings by spaces and newlines rawStrings = self.splitStrings(soup) #apply filters processedStrings = self.preprocess(rawStrings, removeLongEntries=True) #get pairs (word, count) pairs = Utility.countPairs(processedStrings) if verbose: print('|') imageSoup = self.loadSoup(url, 'lxml') return pairs, self.getImagesCount(imageSoup)
def get_blocks(): func = inspect.currentframe().f_back.f_code ip = request.remote_addr logging.info("/blocks accessed from {} via {}".format(ip, request.method)) if request.method == 'POST': if str(ip) != "127.0.0.1" and ip not in variables.PEER_NODES: logging.debug("We didn't know that IP, adding it to Q") message = Utility.buildmessage("ip", ip) logging.debug("message: {}".format(message)) q.put(message) # Load current blockchain. Only you should update your blockchain qfrom = "other" if request.args.get("update") == User.public_key: logging.debug("update was from our public, we updated our blockchain") qget = q.get() logging.debug("qget is {}".format(qget)) qfrom = qget[0] variables.BLOCKCHAIN = qget[1] logging.info("Done updating our blockchain") return "200" else: chain_to_send = variables.BLOCKCHAIN logging.debug("Chain to send:{}".format(chain_to_send)) logging.debug( "request was not from us, we need to give them our blockchain") # Converts our blocks into dictionaries so we can send them as json objects later chain_to_send_json = [] for block in chain_to_send: logging.debug("block to send TYPE:{} details:{}".format( type(block), block)) try: chain_to_send_json.append(block.exportjson()) except AttributeError: logging.error("This is not a block {}".format(block)) # Send our chain to whomever requested it chain_to_send = json.dumps(chain_to_send_json) logging.debug("Sending {}".format(chain_to_send)) logging.info("Done sending out our blockchain") return chain_to_send
def transaction(): func = inspect.currentframe().f_back.f_code #TODO add logging to transactions, currently we can't send and receive blocks. One problem at a time. if request.method == 'POST': # On each new POST request, we extract the transaction data new_txion = request.get_json() # Then we add the transaction to our list if Utility.validate_signature(new_txion['from'], new_txion['signature'], new_txion['message']): variables.PENDING_TRANSACTIONS.append(new_txion) # Because the transaction was successfully # submitted, we log it to our console print("New transaction") print("FROM: {0}".format(new_txion['from'])) print("TO: {0}".format(new_txion['to'])) print("AMOUNT: {0}\n".format(new_txion['amount'])) # Then we let the client know it worked out # Push to all other available nodes for node_url in variables.PEER_NODES: if node_url != request.remote_addr: try: headers = {"Content-Type": "application/json"} requests.post(node_url + ":" + str(User.PORT) + "/txion", json=new_txion, headers=headers) except: pass return "Transaction submission successful\n" else: return "Transaction submission failed. Wrong signature\n" # Send pending transactions to the mining process elif request.method == 'GET' and request.args.get( "update") == User.public_key: pending = json.dumps(variables.PENDING_TRANSACTIONS) # Empty transaction list variables.PENDING_TRANSACTIONS = [] return pending
def consensus(): func = inspect.currentframe().f_back.f_code logging.info("Starting Consensus") peers = variables.PEER_NODES logging.debug("Peers: {}".format(peers)) if len(peers) == 0: logging.info("Ending consensus, we have no peers") return False # Get the blocks from other nodes other_chains = find_new_chains() if len(other_chains) == 0: logging.debug("no chains found") logging.info("Ending consensus, no other chains found") return False if type(other_chains[0]) != type([]): if len(other_chains) < len(variables.BLOCKCHAIN): logging.debug("Our blockchain is bigger") logging.info("Ending consensus, rejecting others") return False else: logging.info("Ending consensus, we have one peer with a longer blockchain") return other_chains # If our chain isn't longest, then we store the longest chain longest = 0 max_length = 0 for i in range(len(other_chains)): if Utility.validate_blockchain(other_chains[i]): chain_length = len(other_chains[i]) if chain_length > max_length: max_length = chain_length longest = i if len(other_chains[longest]) == len(variables.BLOCKCHAIN): logging.debug("Our blockchain is the same size1") logging.info("Ending consensus, rejecting others") return False logging.info("Ending Consensus with a chain") logging.debug("Consensus returned: {}".format(other_chains[longest])) return other_chains[longest]
def getFeature(self, train_query_df, product_df, attribute_df, test_query_df, features="brand,attribute,spelling,nonascii,stopwords,colorExist,color_onehot,brandExist,wmdistance,stemming,word2vec,Word2VecQueryExpansion,tfidf,tfidf_expandedquery,doc2vec,doc2vec_expandedquery,bm25,bm25expandedquery,doclength"): ## Please feel free to add feature into this method. ## For testing, you may want to comment out some feature generation to save time ## as some takes a long time to run. timetracker=Utility() if features.find("brand") != -1: # Create Brand Column product_df = self.__createBrandColumn(product_df, attribute_df) if features.find("attribute") != -1: # Create Attribute column as a JSON string # Column name is attr_json product_df = self.__createAttributeColumn(product_df, attribute_df) if features.find("spelling") != -1: # Perform spell correction on search_term print("Performing spell correction") spell_dict = Feature_Spelling.getSpellingCorrectionDict() # print(self.__spell_correction('lifeswivel', spell_dict)) train_query_df['search_term'] = train_query_df['search_term'].map( lambda x: self.__spell_correction(x, spell_dict)) product_df['product_description'] = product_df['product_description'].map( lambda x: self.__spell_correction(x, spell_dict)) product_df['product_title'] = product_df['product_title'].map( lambda x: self.__spell_correction(x, spell_dict)) product_df['attr_json'] = product_df['attr_json'].map( lambda x: self.__spell_correction(str(x), spell_dict)) if features.find("nonascii") != -1: # Remove non-ascii characters print("Performing non-ascii removal") start_time = time.time() train_query_df['search_term'] = train_query_df['search_term'].map(lambda x: self.__nonascii_clean((x))) print("Non-ascii clean on search_term took: %s minutes" % round(((time.time() - start_time) / 60), 2)) product_df['product_title'] = product_df['product_title'].map(lambda x: self.__nonascii_clean(str(x))) print("Non-ascii clean on product_title took: %s minutes" % round(((time.time() - start_time) / 60), 2)) # Run this to download the download the stopword list if you hit error # nltk.download() if features.find("stopwords") != -1: # Stopwords removal print("Performing stopwords removal") start_time = time.time() train_query_df['search_term'] = train_query_df['search_term'].map(lambda x: self.__stopword_removal((x))) print("stopwords removal on search_term took: %s minutes" % round(((time.time() - start_time) / 60), 2)) product_df['product_title'] = product_df['product_title'].map(lambda x: self.__stopword_removal(str(x))) print("stopwords removal on product_title took: %s minutes" % round(((time.time() - start_time) / 60), 2)) product_df['product_description'] = product_df['product_description'].map(lambda x: self.__stopword_removal(str(x))) print("stopwords removal on product_description took: %s minutes" % round(((time.time() - start_time) / 60), 2)) product_df['attr_json'] = product_df['attr_json'].map(lambda x: self.__stopword_removal(str(x))) print("stopwords removal on attr_jason took: %s minutes" % round(((time.time() - start_time) / 60), 2)) if features.find("colorExist") != -1: # Check if color in search_term exist in product_description column print("Performing color and material check") start_time = time.time() color = Feature_ColorMaterial() train_query_df['color'] = color.checkColorMaterialExists(train_query_df, product_df) train_query_df['color_exist'] = train_query_df['color'].map(lambda x: 1 if len(x)>0 else 0) # Save some memory. Change it to uint8 train_query_df.color_exist = train_query_df.color_exist.astype(np.uint8) if features.find("color_onehot") != -1: train_query_df = self.__onehot_color(train_query_df) # Clean up unused column train_query_df.pop('color') print("Color and material check took: %s minutes" % round(((time.time() - start_time) / 60), 2)) if features.find("brandExist") != -1: # Check if brand in search term exist product_brand column print("Performing brand check") start_time = time.time() train_query_df['brand_exist'] = self.__brandExist(train_query_df, product_df) # train_query_df['brand_exist'] = train_query_df['search_term'].map(lambda x: 1 if len(x)>0 else 0) print("Brand check took: %s minutes" % round(((time.time() - start_time) / 60), 2)) if features.find('wmdistance') != -1: print("Performing Word Mover Distance") start_time = time.time() wm = Feature_WordMoverDistance() train_query_df['wm_product_description'] = wm.getDistance(train_query_df, 'search_term', product_df, 'product_description') print("WMDistance for product_description took: %s minutes" % round(((time.time() - start_time) / 60), 2)) train_query_df['wm_product_title'] = wm.getDistance(train_query_df, 'search_term', product_df, 'product_title') print("WMDistance for product_title took: %s minutes" % round(((time.time() - start_time) / 60), 2)) train_query_df['wm_product_brand'] = wm.getDistance(train_query_df, 'search_term', product_df, 'product_brand') print("WMDistance for product_brand took: %s minutes" % round(((time.time() - start_time) / 60), 2)) train_query_df['wm_attr_json'] = wm.getDistance(train_query_df, 'search_term', product_df, 'attr_json') print("WMDistance for attr_json took: %s minutes" % round(((time.time() - start_time) / 60), 2)) if features.find("stemming") != -1: # # Stemming print("Performing Stemming") start_time = time.time() train_query_df['search_term'] = train_query_df['search_term'].map(lambda x: self.__stemming((x))) print("Stemming search_term took: %s minutes" % round(((time.time() - start_time) / 60), 2)) product_df['product_title'] = product_df['product_title'].map(lambda x: self.__stemming(str(x))) print("Stemming product_title took: %s minutes" % round(((time.time() - start_time) / 60), 2)) product_df['product_brand'] = product_df['product_brand'].map(lambda x: self.__stemming(str(x))) print("Stemming product_brand took: %s minutes" % round(((time.time() - start_time) / 60), 2)) product_df['product_description'] = product_df['product_description'].map(lambda x: self.__stemming(str(x))) print("Stemming product_description took: %s minutes" % round(((time.time() - start_time) / 60), 2)) product_df['attr_json'] = product_df['attr_json'].map(lambda x: self.__stemming(str(x))) print("Stemming attr_json took: %s minutes" % round(((time.time() - start_time) / 60), 2)) if features.find("word2vec") != -1: # Word2Vec print("===========Performing word2vec computation....this may take a while") timetracker.startTimeTrack() print("Merging product_title and description") print(list(product_df)) product_df['content'] = product_df['product_title'].map(str) + " " + \ product_df['product_description'].map(str) + " " + \ product_df['product_brand'].map(str) timetracker.checkpointTimeTrack() print("Adding training query for that product id into the content") product_df = product_df.reset_index(drop=True) counter = 0 for index, product in product_df.iterrows(): # print("product:", product) productId = product['product_uid'] # print("productId:",productId) df = train_query_df[train_query_df.product_uid == productId] # print("df:",df) searchterms = "" for index, row in df.iterrows(): searchterm = row['search_term'] searchterms = searchterms + " " + searchterm newString = product_df.iloc[counter]['content'] + " " + searchterms product_df.set_value(counter, 'content', newString) counter = counter + 1 timetracker.checkpointTimeTrack() w2v = Feature_Word2Vec.Feature_Word2Vec() print("Convert DF into sentences for word2vec processing") sentences = w2v.convertDFIntoSentences(product_df, 'content') timetracker.checkpointTimeTrack() print("Training word2vec") w2v.trainModel(sentences) timetracker.checkpointTimeTrack() print("Validating...this should give some results like sofa") print(w2v.getVectorFromWord('stool')) print(w2v.getSimilarWordVectors('stool', 5)) print("===========Completed word2vec computation") ##WARNING: This has to be before bm25expandedquery function call if features.find("Word2VecQueryExpansion") != -1: # Word2VecQueryExpansion print("===========Performing Word2VecQueryExpansion computation....this may take a super long time") timetracker.startTimeTrack() # print("Merging product_title and description") # print(list(product_df)) # product_df['content']=product_df['product_title'].map(str) +" "+ \ # product_df['product_description'].map(str) + " " + \ # product_df['product_brand'].map(str) # product_df.head(1) print("Compute Word2VecQueryExpansion") w2cExpand = Word2VecQueryExpansion() timetracker.checkpointTimeTrack() # print("Remove merged column") # product_df=product_df.drop('content', axis=1) # For every training query-document pair, generate bm25 print("Generate Word2VecQueryExpansion column") train_query_df = w2cExpand.computeExpandedQueryColumn(trainset=train_query_df, colName='Word2VecQueryExpansion') timetracker.checkpointTimeTrack() print("train_query_df:", list(train_query_df)) print("train_query_df head:", train_query_df.head(1)) print("Saving to csv") train_query_df.to_csv('../data.prune/train_query_with_Word2VecQueryExpansion.csv') timetracker.checkpointTimeTrack() print("===========Completed Word2VecQueryExpansion computation") if features.find("tfidf") != -1: # TF-IDF print("Performing TF-IDF") tfidf = Feature_TFIDF() train_query_df['tfidf_product_title'] = tfidf.getCosineSimilarity(train_query_df, 'search_term', product_df, 'product_title') train_query_df['tfidf_product_brand'] = tfidf.getCosineSimilarity(train_query_df, 'search_term', product_df, 'product_brand') train_query_df['tfidf_product_description'] = tfidf.getCosineSimilarity(train_query_df, 'search_term', product_df, 'product_description') train_query_df['tfidf_attr_json'] = tfidf.getCosineSimilarity(train_query_df, 'search_term', product_df, 'attr_json') if features.find("tfidf_expandedquery") != -1: # TF-IDF on expanded query print("Performing TF-IDF with expanded query") tfidf = Feature_TFIDF() train_query_df['tfidf_expanded_product_title'] = tfidf.getCosineSimilarity(train_query_df, 'Word2VecQueryExpansion', product_df, 'product_title') train_query_df['tfidf_expanded_product_brand'] = tfidf.getCosineSimilarity(train_query_df, 'Word2VecQueryExpansion', product_df, 'product_brand') train_query_df['tfidf_expanded_product_description'] = tfidf.getCosineSimilarity(train_query_df, 'Word2VecQueryExpansion', product_df, 'product_description') train_query_df['tfidf_expanded_attr_json'] = tfidf.getCosineSimilarity(train_query_df, 'Word2VecQueryExpansion', product_df, 'attr_json') if features.find("doc2vec") != -1: # Doc2Vec print("Performing Doc2Vec") doc2vec = Feature_Doc2Vec() train_query_df['doc2vec_product_title'] = doc2vec.getCosineSimilarity(train_query_df, 'search_term', product_df, 'product_title') doc2vec = Feature_Doc2Vec() train_query_df['doc2vec_product_brand'] = doc2vec.getCosineSimilarity(train_query_df, 'search_term', product_df, 'product_brand') doc2vec = Feature_Doc2Vec() train_query_df['doc2vec_product_description'] = doc2vec.getCosineSimilarity(train_query_df, 'search_term', product_df, 'product_description') doc2vec = Feature_Doc2Vec() train_query_df['doc2vec_attr_json'] = doc2vec.getCosineSimilarity(train_query_df, 'search_term', product_df, 'attr_json') if features.find("doc2vec_expandedquery") != -1: # Doc2Vec print("Performing Doc2Vec with expanded query") doc2vec = Feature_Doc2Vec() train_query_df['doc2vec_expanded_product_title'] = doc2vec.getCosineSimilarity(train_query_df, 'Word2VecQueryExpansion', product_df, 'product_title') doc2vec = Feature_Doc2Vec() train_query_df['doc2vec_expanded_product_brand'] = doc2vec.getCosineSimilarity(train_query_df, 'Word2VecQueryExpansion', product_df, 'product_brand') doc2vec = Feature_Doc2Vec() train_query_df['doc2vec_expanded_product_description'] = doc2vec.getCosineSimilarity(train_query_df, 'Word2VecQueryExpansion', product_df, 'product_description') doc2vec = Feature_Doc2Vec() train_query_df['doc2vec_expanded_attr_json'] = doc2vec.getCosineSimilarity(train_query_df, 'Word2VecQueryExpansion', product_df, 'attr_json') if features.find("bm25") != -1: # BM25 print("===========Performing BM25 computation....this may take a while") timetracker.startTimeTrack() print("Merging product_title and description") print(list(product_df)) product_df['content']=product_df['product_title'].map(str) +" "+ \ product_df['product_description'].map(str) + " " + \ product_df['product_brand'].map(str) timetracker.checkpointTimeTrack() print("Adding training query for that product id into the content") product_df=product_df.reset_index(drop=True) counter=0 for index,product in product_df.iterrows(): # print("product:", product) productId=product['product_uid'] # print("productId:",productId) df=train_query_df[train_query_df.product_uid==productId] # print("df:",df) searchterms="" for index,row in df.iterrows(): searchterm=row['search_term'] searchterms=searchterms+" "+searchterm newString=product_df.iloc[counter]['content']+" "+searchterms product_df.set_value(counter,'content',newString) counter=counter+1 timetracker.checkpointTimeTrack() print("Compute BM25") bm25 = Feature_BM25(product_df) timetracker.checkpointTimeTrack() print("Remove merged column") product_df=product_df.drop('content', axis=1) #For every training query-document pair, generate bm25 print("Generate bm25 column") train_query_df=bm25.computeBM25Column(trainset=train_query_df,destColName='bm25', searchTermColname='search_term') timetracker.checkpointTimeTrack() print("train_query_df:",list(train_query_df)) print("train_query_df head:",train_query_df.head(1)) print("Saving to csv") train_query_df.to_csv('../data.prune/train_query_with_bm25_search_term.csv') timetracker.checkpointTimeTrack() print("===========Completed BM25 computation") if features.find("bm25expandedquery") != -1: if features.find("Word2VecQueryExpansion") != -1: # bm25expandedquery print("===========Performing BM25expanded computation....this may take a while") timetracker.startTimeTrack() print("Merging product_title and description") print(list(product_df)) product_df['content']=product_df['product_title'].map(str) +" "+ \ product_df['product_description'].map(str) + " " + \ product_df['product_brand'].map(str) product_df.head(1) timetracker.checkpointTimeTrack() print("Adding training query for that product id into the content") product_df = product_df.reset_index(drop=True) counter = 0 for index, product in product_df.iterrows(): # print("product:", product) productId = product['product_uid'] # print("productId:",productId) df = train_query_df[train_query_df.product_uid == productId] # print("df:",df) searchterms = "" for index, row in df.iterrows(): searchterm = row['search_term'] searchterms = searchterms + " " + searchterm newString = product_df.iloc[counter]['content'] + " " + searchterms product_df.set_value(counter, 'content', newString) counter = counter + 1 timetracker.checkpointTimeTrack() print("Compute BM25") bm25 = Feature_BM25(product_df) timetracker.checkpointTimeTrack() print("Remove merged column") product_df=product_df.drop('content', axis=1) #For every training query-document pair, generate bm25 print("Generate bm25 column") train_query_df=bm25.computeBM25Column(trainset=train_query_df,destColName='bm25expandedquery', searchTermColname='Word2VecQueryExpansion') timetracker.checkpointTimeTrack() print("train_query_df:",list(train_query_df)) print("train_query_df head:",train_query_df.head(1)) print("Saving to csv") train_query_df.to_csv('../data.prune/train_query_with_bm25_Word2VecQueryExpansion.csv') timetracker.checkpointTimeTrack() print("===========Completed BM25expanded computation") else: print("ERROR: Cannot proceed with bm25expandedquery. Word2VecQueryExpansion is not enabled. It is a prerequisite of bm25expandedquery.") if features.find("bm25description") != -1: if features.find("Word2VecQueryExpansion") != -1: # bm25expandedquery print("===========Performing bm25description computation....this may take a while") timetracker.startTimeTrack() print(list(product_df)) # product_df['content']=product_df['product_title'].map(str) +" "+ \ # product_df['product_description'].map(str) + " " + \ # product_df['product_brand'].map(str) product_df['content']=product_df['product_description'].map(str) product_df.head(1) timetracker.checkpointTimeTrack() print("Adding training query for that product id into the content") product_df = product_df.reset_index(drop=True) counter = 0 for index, product in product_df.iterrows(): # print("product:", product) productId = product['product_uid'] # print("productId:",productId) df = train_query_df[train_query_df.product_uid == productId] # print("df:",df) searchterms = "" for index, row in df.iterrows(): searchterm = row['search_term'] searchterms = searchterms + " " + searchterm newString = product_df.iloc[counter]['content'] + " " + searchterms product_df.set_value(counter, 'content', newString) counter = counter + 1 timetracker.checkpointTimeTrack() print("Compute BM25") bm25 = Feature_BM25(product_df) timetracker.checkpointTimeTrack() print("Remove merged column") product_df=product_df.drop('content', axis=1) #For every training query-document pair, generate bm25 print("Generate bm25 column") train_query_df=bm25.computeBM25Column(trainset=train_query_df,destColName='bm25description', searchTermColname='Word2VecQueryExpansion') timetracker.checkpointTimeTrack() print("train_query_df:",list(train_query_df)) print("train_query_df head:",train_query_df.head(1)) print("Saving to csv") train_query_df.to_csv('../data.prune/train_query_with_bm25_Word2VecQueryExpansion.csv') timetracker.checkpointTimeTrack() print("===========Completed bm25description computation") else: print("ERROR: Cannot proceed with bm25description. Word2VecQueryExpansion is not enabled. It is a prerequisite of bm25expandedquery.") if features.find("bm25title") != -1: if features.find("Word2VecQueryExpansion") != -1: # bm25expandedquery print("===========Performing bm25title computation....this may take a while") timetracker.startTimeTrack() print(list(product_df)) # product_df['content']=product_df['product_title'].map(str) +" "+ \ # product_df['product_description'].map(str) + " " + \ # product_df['product_brand'].map(str) product_df['content']=product_df['product_title'].map(str) product_df.head(1) timetracker.checkpointTimeTrack() print("Adding training query for that product id into the content") product_df = product_df.reset_index(drop=True) counter = 0 for index, product in product_df.iterrows(): # print("product:", product) productId = product['product_uid'] # print("productId:",productId) df = train_query_df[train_query_df.product_uid == productId] # print("df:",df) searchterms = "" for index, row in df.iterrows(): searchterm = row['search_term'] searchterms = searchterms + " " + searchterm newString = product_df.iloc[counter]['content'] + " " + searchterms product_df.set_value(counter, 'content', newString) counter = counter + 1 timetracker.checkpointTimeTrack() print("Compute BM25") bm25 = Feature_BM25(product_df) timetracker.checkpointTimeTrack() print("Remove merged column") product_df=product_df.drop('content', axis=1) #For every training query-document pair, generate bm25 print("Generate bm25 column") train_query_df=bm25.computeBM25Column(trainset=train_query_df,destColName='bm25title', searchTermColname='Word2VecQueryExpansion') timetracker.checkpointTimeTrack() print("train_query_df:",list(train_query_df)) print("train_query_df head:",train_query_df.head(1)) print("Saving to csv") train_query_df.to_csv('../data.prune/train_query_with_bm25_Word2VecQueryExpansion.csv') timetracker.checkpointTimeTrack() print("===========Completed bm25title computation") else: print("ERROR: Cannot proceed with bm25title. Word2VecQueryExpansion is not enabled. It is a prerequisite of bm25expandedquery.") if features.find("bm25brand") != -1: if features.find("Word2VecQueryExpansion") != -1: # bm25expandedquery print("===========Performing bm25brand computation....this may take a while") timetracker.startTimeTrack() print(list(product_df)) # product_df['content']=product_df['product_title'].map(str) +" "+ \ # product_df['product_description'].map(str) + " " + \ # product_df['product_brand'].map(str) product_df['content']=product_df['product_brand'].map(str) product_df.head(1) timetracker.checkpointTimeTrack() print("Adding training query for that product id into the content") product_df = product_df.reset_index(drop=True) counter = 0 for index, product in product_df.iterrows(): # print("product:", product) productId = product['product_uid'] # print("productId:",productId) df = train_query_df[train_query_df.product_uid == productId] # print("df:",df) searchterms = "" for index, row in df.iterrows(): searchterm = row['search_term'] searchterms = searchterms + " " + searchterm newString = product_df.iloc[counter]['content'] + " " + searchterms product_df.set_value(counter, 'content', newString) counter = counter + 1 timetracker.checkpointTimeTrack() print("Compute BM25") bm25 = Feature_BM25(product_df) timetracker.checkpointTimeTrack() print("Remove merged column") product_df=product_df.drop('content', axis=1) #For every training query-document pair, generate bm25 print("Generate bm25 column") train_query_df=bm25.computeBM25Column(trainset=train_query_df,destColName='bm25brand', searchTermColname='Word2VecQueryExpansion') timetracker.checkpointTimeTrack() print("train_query_df:",list(train_query_df)) print("train_query_df head:",train_query_df.head(1)) print("Saving to csv") train_query_df.to_csv('../data.prune/train_query_with_bm25_Word2VecQueryExpansion.csv') timetracker.checkpointTimeTrack() print("===========Completed bm25brand computation") else: print("ERROR: Cannot proceed with bm25brand. Word2VecQueryExpansion is not enabled. It is a prerequisite of bm25expandedquery.") if features.find("doclength") != -1: # Document Length print("Performing Document Length") product_df['len_product_title'] = product_df['product_title'].map(lambda x: len(homedepotTokeniser(x))) train_query_df = pd.merge(train_query_df, product_df[['product_uid', 'len_product_title']], how='left', on='product_uid') product_df['len_product_description'] = product_df['product_description'].map(lambda x: len(homedepotTokeniser(x))) train_query_df = pd.merge(train_query_df, product_df[['product_uid', 'len_product_description']], how='left', on='product_uid') product_df['len_brand'] = product_df['product_brand'].map(lambda x: len(homedepotTokeniser(x))) train_query_df = pd.merge(train_query_df, product_df[['product_uid', 'len_brand']], how='left', on='product_uid') train_query_df['len_search_term'] = train_query_df['search_term'].map(lambda x: len(homedepotTokeniser(x))) if features.find("pmi") != -1: print("===========Performing pmi computation....this may take a while") timetracker.startTimeTrack() print(list(product_df)) product_df['content'] = product_df['product_title'].map(str) + " " + \ product_df['product_description'].map(str) timetracker.checkpointTimeTrack() print("Adding training query for that product id into the content") product_df = product_df.reset_index(drop=True) counter = 0 for index, product in product_df.iterrows(): # print("product:", product) productId = product['product_uid'] # print("productId:",productId) df = train_query_df[train_query_df.product_uid == productId] # print("df:",df) searchterms = "" for index, row in df.iterrows(): searchterm = row['search_term'] searchterms = searchterms + " " + searchterm newString = product_df.iloc[counter]['content'] + " " + searchterms product_df.set_value(counter, 'content', newString) counter = counter + 1 timetracker.checkpointTimeTrack() # Creating content text = product_df['content'].str.cat(sep=' ') pmiFeature = Feature_PMI.Feature_PMI(text) # print("PMI 'kitchen','cabinet': ", pmiFeature.computePMI('kitchen', 'cabinet')) train_query_df = pmiFeature.computePMIColumn(trainset=train_query_df) # print(list(train_query_df), "\n", train_query_df['pmi']) # train_query_df.filter(items=['id', 'pmi']).to_csv('pmi_features.csv') print("train_query_df final column:\n", train_query_df.info()) return train_query_df
def __init__(self, driver): self.driver = driver Utility.__init__(self, driver)
def start(a): global q q = a global node node.config['SECRET_KEY'] = Utility.createHexdigest(User.password) node.run(host="0.0.0.0", port=variables.PORT)
# cols=['color_exist','len_product_description'] # getFeatureRMSEAgainstBaseline(cols) # cols=['color_exist','len_brand'] # getFeatureRMSEAgainstBaseline(cols) # cols=['color_exist','len_search_term'] # getFeatureRMSEAgainstBaseline(cols) # cols=['color_exist','sense2vec_all_simscore','sense2vec_keeptag_simscore','sense2vec_uidfact_all_simscore','sense2vec_uidfact_keeptag_simscore','sense2vec_all_attr_simscore','sense2vec_keeptag_attr_simscore','sense2vec_uidfact_all_attr_simscore','sense2vec_uidfact_keeptag_attr_simscore'] # getFeatureRMSEAgainstBaseline(cols) # cols=['color_exist','product_uid_threshold'] # getFeatureRMSEAgainstBaseline(cols) # cols=['color_exist','noun_overlap_counts','noun_uniq_overlap_counts','noun_overlap_ratio'] # getFeatureRMSEAgainstBaseline(cols) if __name__ == "__main__": # print("Should not print") utility = Utility() utility.startTimeTrack() # This part skips the feature training and simply use it. # print("Reading features_full_plusnouns set") # all_df=pd.read_csv('../data/features_full_plusnouns_pluspuidthresh.csv') myFeatureSetFileReference = '../data/features_doc2vec_sense2vec_pmi_20170418.csv' print("Reading features_doc2vec_sense2vec_pmi_20170418 set") all_df = pd.read_csv(myFeatureSetFileReference, low_memory=True) print("Completed: Reading features_doc2vec_sense2vec_pmi_20170418 set") feature_train_df = all_df[:74067] # feature_train_df.drop('doc2vec_search_term_vector', axis=1, inplace=True) # feature_train_df.drop('doc2vec_product_title_vector', axis=1, inplace=True) # feature_train_df.drop('doc2vec_product_brand_vector', axis=1, inplace=True)
def exeFMBidModel(testDF=None, validateDF=None, trainDF=None, trainReader=None, validationReader=None, testReader=None, writeResult2CSV=False): print("============ Factorisation Machine bid model....setting up") timer = Utility() timer.startTimeTrack() print("Getting encoded datasets") trainOneHotData, trainY = trainReader.getOneHotData() validationOneHotData, valY = validationReader.getOneHotData(train_cols=trainOneHotData.columns.get_values().tolist()) testOneHotData, testY = testReader.getOneHotData(train_cols=trainOneHotData.columns.get_values().tolist()) timer.checkpointTimeTrack() print("trainOneHotData:",trainOneHotData.shape,list(trainOneHotData)) print("trainY:", trainY.shape, list(trainY)) print("validationOneHotData:",validationOneHotData.shape,list(validationOneHotData)) print("valY:", valY.shape, list(valY)) fmBidModel=FMBidModel.FMBidModel(cBudget=6250 * 1000, modelType='fmclassificationsgd') print("==========Training starts") # fmBidModel.gridSearchandCrossValidateFastSGD(trainOneHotData, trainY) # timer.checkpointTimeTrack() fmBidModel.trainModel(trainOneHotData,trainY, retrain=True, modelFile="data.pruned/fmclassificationsgd.pkl") timer.checkpointTimeTrack() print("==========Validation starts") predictedProb=fmBidModel.validateModel(validationOneHotData, valY) timer.checkpointTimeTrack() # print("==========Bid optimisation starts") # fmBidModel.optimiseBid(validationOneHotData,valY) # timer.checkpointTimeTrack() # best score 0.3683528286042599 # noBidThreshold 2.833333e-01 # minBid 2.000000e+02 # bidRange 9.000000e+01 # sigmoidDegree - 1.000000e+01 # won 3.432900e+04 # click 1.380000e+02 # spend 2.729869e+06 # trimmed_bids 0.000000e+00 # CTR 4.019925e-03 # CPM 7.952078e+04 # CPC 1.978166e+04 # blended_score 3.683528e-01 # best score 0.3681133881545131 # noBidThreshold 2.833333e-01 # minBid 2.000000e+02 # bidRange 1.000000e+02 # sigmoidDegree - 1.000000e+01 # won 3.449900e+04 # click 1.380000e+02 # spend 2.758561e+06 # trimmed_bids 0.000000e+00 # CTR 4.000116e-03 # CPM 7.996061e+04 # CPC 1.998957e+04 # blended_score 3.681134e-01 # New budget 6250000 # FM # best score 0.32755084132163526 # noBidThreshold 8.666667e-01 # minBid 2.000000e+02 # bidRange 2.500000e+02 # sigmoidDegree - 1.000000e+01 # won 1.461000e+04 # click 1.170000e+02 # spend 1.124960e+06 # trimmed_bids 0.000000e+00 # CTR 8.008214e-03 # CPM 7.699932e+04 # CPC 9.615043e+03 # blended_score 3.275508e-01 # print("==========Getting bids") ## 25000 budget # bidIdPriceDF=fmBidModel.getBidPrice(validationOneHotData,valY,noBidThreshold=0.2833333,minBid=200,bidRange=100,sigmoidDegree=-10) ## 6250 budget # bidIdPriceDF=fmBidModel.getBidPrice(validationOneHotData,valY,noBidThreshold=0.8666667,minBid=200,bidRange=250,sigmoidDegree=-10) # print("bidIdPriceDF:",bidIdPriceDF.shape, list(bidIdPriceDF)) # bidIdPriceDF.to_csv("mybids.csv") # timer.checkpointTimeTrack() return predictedProb
import logging FORMAT = "[{%(levelname)s} %(filename)s:%(lineno)s - %(funcName)20s() ] %(message)s" logging.basicConfig(filename='scratch.log', level=logging.DEBUG, format=FORMAT) from Blockchain_classes.Blockchain import Blockchain import time from Blockchain_classes.Block import Block import User_classes.User as User import Utilities.Utility as Utility WORK = 5 genesis = Utility.create_genesis_block() added = 0 blockchain = Blockchain(genesis) while added < 100: last_block = blockchain.last_added() now = time.time() data = [{"from": "network", "to": User.public_key, "amount": 1.0}] done = False block = None while not done: effort, pow_hash_object = Utility.genhash(last_block.index + 1, now, data, last_block.hash) #this is a test .... leading_zeroes = Utility.leadingzeroes(pow_hash_object.digest()) if leading_zeroes >= WORK: done = True added += 1 b = Block(last_block.index + 1, now, pow_hash_object.hexdigest(), effort,
def __get_value(self, v, r, byte_order): if r is None: return None len = self.__get_block_length(v.type) if re.match("words:", v.type, re.M | re.I): var = [] for a in r: h = ((a & 0xff00) >> 8) l = (a & 0xff) if re.search(r'(^ab($|cd$))|(^cdab$)', byte_order) is not None: var.append(h << 8 | l) else: var.append(a) # for a in Utility.toDoubleList(r): # h0 = ((a[0] & 0xff00) >> 8) # l0 = (a[0] & 0xff) # h1 = ((a[1] & 0xff00) >> 8) # l1 = (a[1] & 0xff) # d = Utility.toDWord(l0, h0, l1, h1, byte_order, v.type) # var.append(((d & 0xffff0000) >> 16)) # var.append((d & 0xffff)) # logger.debug("get words data : %s" % var) logger.debug("Get words data.") return var elif re.match("bytes:", v.type, re.M | re.I): var = [] for a in r: h = ((a & 0xff00) >> 8) l = (a & 0xff) if re.search(r'(^ab($|cd$))|(^cdab$)', byte_order) is not None: var.append(h) var.append(l) else: var.append(l) var.append(h) # logger.debug("get bytes data : %s" % var) logger.debug("Get bytes data.") return var elif re.match("string:", v.type, re.M | re.I): var = [] for a in r: h = ((a & 0xff00) >> 8) l = (a & 0xff) if re.search(r'(^ab($|cd$))|(^cdab$)', byte_order) is not None: # var.append(chr(h << 8 | l)) var.append(chr(h)) var.append(chr(l)) else: # var.append(chr(a)) var.append(chr(l)) var.append(chr(h)) # logger.debug("get string data : %s" % ''.join(var)) logger.debug("Get string data.") return ''.join(var) elif re.match("bits:", v.type, re.M | re.I): var = [] for a in r: var.append(0 if a == 0 else 1) # logger.debug("get bits data : %s" % var) logger.debug("Get bits data.") return var elif re.match("dwords:", v.type, re.M | re.I): var = [] for a in Utility.toDoubleList(r): h0 = ((a[0] & 0xff00) >> 8) l0 = (a[0] & 0xff) h1 = ((a[1] & 0xff00) >> 8) l1 = (a[1] & 0xff) var.append(Utility.toDWord(l0, h0, l1, h1, byte_order, v.type)) # logger.debug("get dwords data : %s" % var) logger.debug("Get dwords data.") return var elif re.match("floats:", v.type, re.M | re.I): var = [] for a in Utility.toDoubleList(r): h0 = ((a[0] & 0xff00) >> 8) l0 = (a[0] & 0xff) h1 = ((a[1] & 0xff00) >> 8) l1 = (a[1] & 0xff) var.append(Utility.toFloat(l0, h0, l1, h1, byte_order, v.type)) # logger.debug("get floats data : %s" % var) logger.debug("Get floats data.") return var else: pass if len == 1: if v.type == 'bit': return 0 if r[0] == 0 else 1 else: h = ((r[0] & 0xff00) >> 8) l = (r[0] & 0xff) if re.search(r'(^ba($|dc$))|(^dcba$)', byte_order) is not None: if re.search('^unsigned', v.type): t = ((l << 8) & 0xff00) | h return t else: t = (((l & 0x7f) << 8) & 0xff00) | h return t if (l & 0x80) == 0 else (t - 32768) else: if re.search('^unsigned', v.type): return r[0] else: return r[0] if (r[0] & 0x8000) == 0 else r[0] - 65536 elif len == 2: if re.match("bits:", v.type, re.M | re.I): val = [] for a in r: val.append(0 if a == 0 else 1) return val else: h0 = ((r[0] & 0xff00) >> 8) l0 = (r[0] & 0xff) h1 = ((r[1] & 0xff00) >> 8) l1 = (r[1] & 0xff) if v.type == 'float': return Utility.toFloat( l0, h0, l1, h1, byte_order, v.type) # ieee754 converting,precision:default 2 else: return Utility.toDWord(l0, h0, l1, h1, byte_order, v.type) else: var = [] for a in r: h = ((a & 0xff00) >> 8) l = (a & 0xff) if re.search(r'(^ab($|cd$))|(^cdab$)', byte_order) is not None: var.append(h) var.append(l) else: var.append(l) var.append(h) return var
data=predicted) else: print("Error: No model was trained in this instance....") return predictedProb[:, 1] if __name__ == "__main__": trainset = "data.final/train1_cleaned_prune.csv" validationset = "data.final/validation_cleaned.csv" testset = "data.final/test.csv" print("Reading dataset...") timer = Utility() timer.startTimeTrack() trainReader = ipinyouReader.ipinyouReader(trainset) validationReader = ipinyouReader.ipinyouReader(validationset) testReader = ipinyouReader.ipinyouReader(testset) timer.checkpointTimeTrack() print("Getting encoded datasets") trainOneHotData, trainY = trainReader.getOneHotData() validationOneHotData, valY = validationReader.getOneHotData( train_cols=trainOneHotData.columns.get_values().tolist()) testOneHotData, testY = testReader.getOneHotData( train_cols=trainOneHotData.columns.get_values().tolist()) timer.checkpointTimeTrack() print("trainOneHotData:", trainOneHotData.shape, list(trainOneHotData))
import logging FORMAT = "[{%(levelname)s} %(filename)s:%(lineno)s - %(funcName)20s() ] %(message)s" logging.basicConfig(filename='scratch.log', level=logging.DEBUG, format=FORMAT) from Mining.Block import Block import time import User.User as User import Utilities.Utility as Utility WORK = 3 BLOCKCHAIN = [] BLOCKCHAIN.append(Utility.create_genesis_block()) while True: if len(BLOCKCHAIN) == 2500: break last_block = BLOCKCHAIN[len(BLOCKCHAIN) - 1] now = time.time() data = [{"from": "network", "to": User.public_key, "amount": 1.0}] done = False block = None while not done: effort, pow_hash_object = Utility.genhash(last_block.index + 1, now, data, last_block.hash) leading_zeroes = Utility.leadingzeroes(pow_hash_object.digest()) if leading_zeroes >= WORK: done = True block = Block(last_block.index + 1, now, pow_hash_object.hexdigest(), effort, data, last_block.hash) BLOCKCHAIN.append(block) Utility.validate_blockchain(BLOCKCHAIN)
def preprocess( self, strings, verbose=False, removeNumericStrings=True, stringsRemovalThreshold=0.1, removeLinks=True, toLowerCase=True, stripTrailingPunctuation=True, punctuation='|&<>\“”"_=:!.,()?…\/{}][;:', removeEmptyEntries=True, removeShortEntries=True, minEntryLength=2, removeLongEntries=False, maxEntryLength=20, removeTheAAn=True, stemming=True, stripTrailingNumbers=True, removeNonAsciiWords=True, ): if verbose: print('preprocess: started with', len(strings), 'strings') if toLowerCase: strings = [s.lower() for s in strings] if removeNumericStrings: strings = [ s for s in strings if Utility.getNumericContent(s) < stringsRemovalThreshold ] if verbose: print('preprocess: removeNumbers:', len(strings), 'strings') if removeLinks: strings = [ s for s in strings if re.match( 'https?://(?:[-\*w.]|(?:%[\da-fA-F]{2}))+', s) == None ] if verbose: print('preprocess: removeLinks:', len(strings), 'strings') if removeLongEntries: strings = [s for s in strings if len(s) <= maxEntryLength] if verbose: print('preprocess: removeLongEntries:', len(strings), 'strings') if removeTheAAn: theAAn = ['the', 'a', 'an'] strings = [s for s in strings if s not in theAAn] if verbose: print('preprocess: removeTheAAn:', len(strings), 'strings') if removeNonAsciiWords: strings = [s for s in strings if Utility.isAscii(s)] if verbose: print('preprocess: removeNonAsciiWords:', len(strings), 'strings') if stripTrailingPunctuation: for i, s in enumerate(strings): for p in punctuation: strings[i] = s.rstrip(p).lstrip(p) if stripTrailingNumbers: numbers = '1234567890' for i, s in enumerate(strings): for p in numbers: strings[i] = s.rstrip(p).lstrip(p) for i in range(len(strings)): for p in punctuation: strings[i] = strings[i].rstrip(p).lstrip(p) newstrings = [] for i in range(len(strings)): s = strings[i] for p in punctuation: s = s.rstrip(p).lstrip(p) newstrings.append(s) strings = newstrings if removeEmptyEntries: strings = [s for s in strings if s != None] if verbose: print('preprocess: removeEmptyEntries:', len(strings), 'strings') if removeShortEntries: strings = [s for s in strings if len(s) >= minEntryLength] if verbose: print('preprocess: removeShortEntries:', len(strings), 'strings') if stemming: strings = [stem(s) for s in strings] return strings
graph = { '1': ['2', '3', '4'], '2': ['5', '6'], '5': ['9', '10'], '4': ['7', '8'], '7': ['11', '12'] } startState = startStateHard # startState = startStateEasy # startState = startStateMedium # startState = startStateHard utility = Utility() blankIndex = utility.getBlankIndex(startState) stringRepOfStart = utility.getStringRep(startState) stringRepOfGoal = utility.getStringRep(goalState) startNode = BoardNode(startState, 'NULL', 'NULL', blankIndex, 1,stringRepOfStart) goalNode = BoardNode(goalState, "NULL", "NULL", (1, 1),-1,stringRepOfGoal) bfsSearch = BreadthFirst() dfsSearch = DepthFirst() dfsLimited = DepthLimited() incDFSLimited = IncrementalDepthLimited() bestFirstSearch = BestFirst() astar = A_Star() start_time = time.time() astar.aStarSearch(startNode,goalNode,H1)