def main(): tlds = loadTlds() inFile = open(rawFile) outFile = open(sanitizedFile, "w") writer = csv.writer(outFile) appendHeader(writer) for line in inFile: fields = processLine(line, tlds) writer.writerow(fields) inFile.close() outFile.close()
from nupic.frameworks.opf.modelfactory import ModelFactory from nupic.frameworks.opf.predictionmetricsmanager import MetricsManager from urls import loadTlds from urls import parseUrl import datetime import texttable scriptDir = os.path.dirname(os.path.realpath(__file__)) _LOGGER = logging.getLogger(__name__) _DATA_PATH = os.path.join(scriptDir, "..", "data", "sanitized.csv") _MODEL_PATH = os.path.join(scriptDir, "..", "savedModel", "checkpoint") model = ModelFactory.loadFromCheckpoint(_MODEL_PATH) model.disableLearning() tlds = loadTlds() class UrlShell(cmd.Cmd): intro = 'Enter URL to predict next hostname.\n' prompt = 'url> ' file = None def default(self, line): processOneUrl(line) def processOneUrl(url): timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%s") urlFields = parseUrl(url, tlds) # Create dict: timestamp,tld,hostname,port,subdomain,path1,path2,path3,path4,path5,path6