def __init__(self, host, port, login=None, password=None, timeout=60, debug=False): """ Initialize the scanner instance by setting up a connection and authenticating if credentials are provided. @type host: string @param host: The hostname of the running Nessus server. @type port: number @param port: The port number for the XMLRPC interface on the Nessus server. @type login: string @param login: The username for logging in to Nessus. @type password: string @param password: The password for logging in to Nessus. @type debug: bool @param debug: turn on debugging. """ self.token = None self.isadmin = None self.host = host self.port = port self.timeout = timeout self.debug = debug self.logger = get_logger('Scanner') self.connection = None self.headers = {"Content-type": "application/x-www-form-urlencoded", "Accept": "text/plain"} self.username = login self.password = password self._connect() self.login()
def load(self, host, user, passwd, database, databaseType): self.host = host self.user = user self.passwd = passwd self.database = database self.databaseType = databaseType self.logger = get_logger("DatabaseOrchestrator") if databaseType == "MySQL": try: self.connectedDB = MySQLdb.connect(host, user, passwd, database) except: self.logger.error(logging.exception("MySQL - Login failure")) raise DatabaseConnectError("MySQL - Login failure") elif databaseType == "PostgresSQL": try: self.connectedDB = psycopg2.connect( "dbname=\'{}\' user=\'{}\'".format(database, user)) except: self.logger.error( logging.exception("PostgresSQL - Login failure")) raise DatabaseConnectError("PostgresSQL - Login failure") else: #Error occured here, no valid handling for databaseType given raise DatabaseTypeError(databaseType) self.cursor = self.connectedDB.cursor()
def __init__(self): self.stdscr = curses.initscr() #initialize ncurses self.logger = get_logger("NcursesHandler") curses.noecho( ) # Disables automatic echoing of key presses (prevents program from input each key twice) curses.cbreak( ) # Runs each key as it is pressed rather than waiting for the return key to pressed) curses.start_color() #allow colors self.stdscr.keypad( 1) # Capture input from keypad allow to move around on menus self.stdscr.bkgd(' ', curses.color_pair(2)) #window for top menu bar self.stdscr2 = curses.newwin(3, 80, 0, 0) #window help menu self.stdscr3 = curses.newwin(21, 44, 7, 18) #Create color pairs. curses.init_pair(1, curses.COLOR_BLACK, curses.COLOR_YELLOW) curses.init_pair(2, curses.COLOR_WHITE, curses.COLOR_BLUE) curses.init_pair(3, curses.COLOR_BLACK, curses.COLOR_CYAN) curses.init_pair(4, curses.COLOR_RED, curses.COLOR_BLACK) curses.init_pair(5, curses.COLOR_BLUE, curses.COLOR_BLACK) curses.init_pair(6, curses.COLOR_CYAN, curses.COLOR_BLACK) curses.init_pair(7, curses.COLOR_BLACK, curses.COLOR_MAGENTA) curses.init_pair(8, curses.COLOR_BLACK, curses.COLOR_WHITE) curses.init_pair(9, curses.COLOR_BLACK, curses.COLOR_RED) curses.init_pair(10, curses.COLOR_BLACK, curses.COLOR_GREEN) curses.init_pair(11, curses.COLOR_CYAN, curses.COLOR_BLACK) curses.init_pair(12, curses.COLOR_WHITE, curses.COLOR_CYAN) curses.init_pair(13, curses.COLOR_YELLOW, curses.COLOR_BLUE) curses.init_pair(14, curses.COLOR_GREEN, curses.COLOR_BLACK) self.h = curses.color_pair( 14) #h is the coloring for a highlighted menu option self.n = curses.A_NORMAL #n is the coloring for a non highlighted menu option
def __init__(self): self.stdscr = curses.initscr() #initialize ncurses self.logger = get_logger("NcursesHandler") curses.noecho() # Disables automatic echoing of key presses (prevents program from input each key twice) curses.cbreak() # Runs each key as it is pressed rather than waiting for the return key to pressed) curses.start_color() #allow colors self.stdscr.keypad(1) # Capture input from keypad allow to move around on menus self.stdscr.bkgd(' ', curses.color_pair(2)) #window for top menu bar self.stdscr2 = curses.newwin(3, 80, 0, 0) #window help menu self.stdscr3 = curses.newwin(21, 44, 7, 18) #Create color pairs. curses.init_pair(1, curses.COLOR_BLACK, curses.COLOR_YELLOW) curses.init_pair(2, curses.COLOR_WHITE, curses.COLOR_BLUE) curses.init_pair(3, curses.COLOR_BLACK, curses.COLOR_CYAN) curses.init_pair(4, curses.COLOR_RED, curses.COLOR_BLACK) curses.init_pair(5, curses.COLOR_BLUE, curses.COLOR_BLACK) curses.init_pair(6, curses.COLOR_CYAN, curses.COLOR_BLACK) curses.init_pair(7, curses.COLOR_BLACK, curses.COLOR_MAGENTA) curses.init_pair(8, curses.COLOR_BLACK, curses.COLOR_WHITE) curses.init_pair(9, curses.COLOR_BLACK, curses.COLOR_RED) curses.init_pair(10, curses.COLOR_BLACK, curses.COLOR_GREEN) curses.init_pair(11, curses.COLOR_CYAN, curses.COLOR_BLACK) curses.init_pair(12, curses.COLOR_WHITE, curses.COLOR_CYAN) curses.init_pair(13, curses.COLOR_YELLOW, curses.COLOR_BLUE) curses.init_pair(14, curses.COLOR_GREEN, curses.COLOR_BLACK) self.h = curses.color_pair(14) #h is the coloring for a highlighted menu option self.n = curses.A_NORMAL #n is the coloring for a non highlighted menu option
def load(self, host, user, passwd, database, databaseType): self.host = host self.user = user self.passwd = passwd self.database = database self.databaseType = databaseType self.logger = get_logger("DatabaseOrchestrator") if databaseType == "MySQL": try: self.connectedDB = MySQLdb.connect(host, user, passwd, database) except: self.logger.error(logging.exception("MySQL - Login failure")) raise DatabaseConnectError("MySQL - Login failure") elif databaseType == "PostgresSQL": try: self.connectedDB = psycopg2.connect("dbname='{}' user='******'".format(database, user)) except: self.logger.error(logging.exception("PostgresSQL - Login failure")) raise DatabaseConnectError("PostgresSQL - Login failure") else: # Error occured here, no valid handling for databaseType given raise DatabaseTypeError(databaseType) self.cursor = self.connectedDB.cursor()
import string import time from typing import List, Any, Tuple import numpy as np import pandas as pd import nltk from nltk.corpus import stopwords from nltk.tokenize import WordPunctTokenizer from sklearn.model_selection import train_test_split from sklearn.feature_extraction.text import TfidfVectorizer from configs import AMAZON_REVIEWS_CSV from Logger import get_logger LOGGER = get_logger(__name__) STOPWORDS = set(stopwords.words('english')) def clean_text(line: str) -> str: line = line.translate(string.punctuation) line = line.lower().split() line = [word for word in line if not word in STOPWORDS and len(word) > 2] line = ' '.join(line) return re.sub(r"[^A-Za-z0-9^.,!\/'+-=]", " ", line) def matrix_factorization( Ratings: pd.DataFrame, U: pd.DataFrame, P: pd.DataFrame,
from Logger import get_logger from Constants import FILES, \ SPECIFIC_PATH, \ IGNORED_STRINGS, \ FTP_ADDRESS, \ FTP_CWD, \ NASDAQ_FILE, \ OTHER_FILE, \ FTP_DELIMITER, \ FILTERED_SYMBOLS, \ EXCHANGES, \ API_URL, \ TICKERS_FOLDER logger = get_logger() DIRNAME, _ = path.split(path.abspath(__file__)) BASE_PATH = path.dirname(path.abspath(__file__)) def get_all_tickers_from_ftp(base=BASE_PATH, specific=SPECIFIC_PATH): _write_all_tickers_from_ftp(base, specific) all_tickers = _pull_all_tickers_from_file(base, specific) logger.info("Found {} tickers using ftp".format(len(all_tickers))) return _filter_all_tickers(all_tickers) def _pull_all_tickers_from_file(base, specific): ticker_file = open(path.join(base, specific), 'r') all_tickers = []
def cotrain_wrapper(dataset='mldoc', source_language='english', target_language='spanish', num_epochs=3, num_seeds=50, train_size=1000, sw_train_size=10000, manual_seed=0, student_name='logreg', experiments_dir='./', alignment_method='google_translate', tokenizer_method='spacy', num_iter=2, cuda_device=0): now = datetime.now() basedatapath = '../data' # Define dataset-specific paths and metrics if dataset == 'mldoc': datapath = os.path.join(basedatapath, dataset) vocab_path = os.path.join(datapath, 'vocab/') seed_words_path = os.path.join(experiments_dir, '{}/seedwords/'.format(target_language)) checkpoint_path = os.path.join(experiments_dir, target_language) metric = 'acc' elif 'cls' in dataset: dataset_name = dataset.split('_')[0] domain = dataset.split('_')[1] datapath = os.path.join(basedatapath, dataset_name) #vocab_path = os.path.join(datapath, 'vocab/') #datapath = os.path.join(home, 'data2/multilingual/{}/'.format(dataset_name)) vocab_path = os.path.join(datapath, 'vocab/{}/'.format(domain)) seed_words_path = os.path.join( experiments_dir, '{}_{}/seedwords/'.format(target_language, domain)) checkpoint_path = os.path.join(experiments_dir, '{}_{}'.format(target_language, domain)) metric = 'acc' elif dataset == 'twittersent': datapath = os.path.join(basedatapath, dataset) vocab_path = os.path.join(datapath, 'vocab/') seed_words_path = os.path.join(experiments_dir, '{}/seedwords/'.format(target_language)) checkpoint_path = os.path.join(experiments_dir, target_language) metric = 'f1' elif dataset == 'lorelei': datapath = os.path.join(basedatapath, dataset) vocab_path = os.path.join(datapath, 'vocab/') seed_words_path = os.path.join(experiments_dir, '{}/seedwords/'.format(target_language)) checkpoint_path = os.path.join(experiments_dir, target_language) metric = 'acc' else: raise (BaseException('dataset not supported: {}'.format(dataset))) if student_name == 'logreg' and not os.path.exists(vocab_path): print("Training tokenizer...") #tok_dataset = dataset if not 'cls' in dataset else dataset_name train_tokenizer(dataset, train_size=train_size, savepath=vocab_path) if alignment_method == 'google_translate': translation_dict_path = os.path.join(basedatapath, 'googletranslate_dict.pkl') elif alignment_method == 'muse_dict': # to use the MUSE dict, make sure you've ran "python prepare_muse_dict.py" under the "data" directory translation_dict_path = os.path.join(basedatapath, 'muse_dict.pkl') else: raise (BaseException( 'translation method not implemented: {}'.format(alignment_method))) # Define save paths print('Experiments dir: {}'.format(experiments_dir)) os.makedirs(checkpoint_path, exist_ok=True) results_savefile = os.path.join(checkpoint_path, 'results.pkl') loggerfile = os.path.join(checkpoint_path, 'log.log') logger = get_logger(logfile=loggerfile) # Define model args # more arguments: https://github.com/ThilinaRajapakse/simpletransformers/blob/master/simpletransformers/config/global_args.py teacher_args = {'name': 'SeedCLF'} student_args = { 'model_name': student_name, 'reprocess_input_data': True, 'overwrite_output_dir': True, 'evaluate_during_training': True, 'num_train_epochs': num_epochs, 'output_dir': checkpoint_path, 'fp16': False, "manual_seed": manual_seed, "no_cache": True, "use_cached_eval_features": False, "tensorboard_dir": os.path.join(experiments_dir, 'tensorboard/'), "cache_dir": os.path.join(experiments_dir, 'cache/'), "use_early_stopping": True, "early_stopping_patience": 3, "n_gpu": 1, "save_model_every_epoch": False, "cuda_device": cuda_device } # Run co-training res = cotrain(dataset=dataset, source_language=source_language, target_language=target_language, train_size=train_size, num_seeds=num_seeds, sw_train_size=sw_train_size, translation_dict_path=translation_dict_path, seed_words_path=seed_words_path, vocab_path=vocab_path, student_args=student_args, teacher_args=teacher_args, manual_seed=manual_seed, logger=logger, alignment_method=alignment_method, tokenizer_method=tokenizer_method, num_iter=num_iter, metric=metric) # Save results print("Finished experiments: {}".format(datetime.now() - now)) print("Saved results at {}".format(results_savefile)) joblib.dump(res, results_savefile) close(logger) return res
def __init__(self, configfile, scans, debug=False, timeout=None): """ @type configfile: string @param configfile: Full path to a configuration file for loading defaults @type scans: list @param scans: A list() of scans assembled with all necessary context """ self.scans_running = [] # Scans currently running. self.scans_complete = [] # Scans that have completed. self.scans = scans # Scans that remain to be started. self.started = False # Flag for telling when scanning has started. self.debugging = debug # flag to turn of debugging low level stuff. self.timeout = timeout # Max time to try to connect to the host. In seconds, or 0 if disabled. # Parse the configuration file to set everything up self.config = ConfigParser.ConfigParser() self.config.readfp(open(configfile)) loglevels = {'debug': logging.DEBUG, 'info': logging.INFO, 'warning': logging.WARNING, 'error': logging.ERROR, 'critical': logging.CRITICAL} # Core settings self.logfile = self.config.get('core', 'logfile') if self.debugging is True: self.loglevel = logging.DEBUG else: self.loglevel = loglevels[self.config.get('core', 'loglevel')] # Setup some basic logging. setup_logger(self.logfile, self.loglevel, debug=debug) self.logger = get_logger('Nessus') self.debug("CONF configfile = %s" % configfile) self.debug("Logger initiated; Logfile: %s, Loglevel: %s" % (self.logfile, self.loglevel)) self.server = self.config.get('core', 'server') self.debug("CONF core.server = %s" % self.server) self.port = self.config.getint('core', 'port') self.debug("CONF core.port = %s" % self.port) self.user = self.config.get('core', 'user') self.debug("CONF core.user = %s" % self.user) self.password = self.config.get('core', 'password') self.debug("CONF core.password set") self.limit = self.config.getint('core', 'limit') self.debug("CONF core.limit = %d" % self.limit) self.sleepmax = self.config.getint('core', 'sleepmax') self.debug("CONF core.sleepmax = %d" % self.sleepmax) self.sleepmin = self.config.getint('core', 'sleepmin') self.debug("CONF core.sleepmin = %d" % self.sleepmin) if self.config.has_option('core', 'timeput'): if self.timeout is not None and self.timeout == default_timeout: self.timeout = self.config.getint('core', 'timeout') # command line argument takes precedence ... if self.debugging is not True and self.config.has_option('core', 'debug'): self.debugging = self.config.getboolean('core', 'debug') self.debug("CONF core.debug = %s" % self.debugging) # SMTP settings self.emailto = self.config.get('smtp', 'to') self.debug("CONF smtp.emailto = %s" % self.emailto) self.emailfrom = self.config.get('smtp', 'from') self.debug("CONF smtp.emailfrom = %s" % self.emailfrom) self.smtpserver = self.config.get('smtp', 'server') self.debug("CONF smtp.smtpserver = %s" % self.smtpserver) self.smtpport = self.config.getint('smtp', 'port') self.debug("CONF smtp.smtpport = %d" % self.smtpport) # Reporting settings self.reports = self.config.get('report', 'outputdir') self.debug("CONF report.reports = %s" % self.reports) self.xsltproc = self.config.get('report', 'xsltproc') self.debug("CONF report.xsltproc = %s" % self.xsltproc) self.xsltlog = self.config.get('report', 'xsltlog') self.debug("CONF report.xsltlog = %s" % self.xsltlog) self.xsl = self.config.get('report', 'xsl') self.debug("CONF report.xsl = %s" % self.xsl) self.debug("PARSED scans: %s" % self.scans) try: self.info("Nessus scanner started.") self.scanner = Scanner(self.server, self.port, self.user, self.password, timeout=self.timeout, debug=self.debugging) self.info( "Connected to Nessus server; authenticated to server '%s' as user '%s'" % (self.server, self.user)) except socket.error as (errno, strerror): self.error( "Socket error encountered while connecting to Nessus server: %s. User: '******', Server: '%s', Port: %s" % ( strerror, self.user, self.server, self.port)) sys.exit(1)
from datetime import datetime from fantasy_pros_scraper import scrape from CSVPlayerGather import get_all_games, get_all_players, get_all_projections from NFL_Player_Holder import NFLPlayerHolder from NFL_Lineup_Generator import NFLLineupGenerator from NFL.GeneticAlgorithm import GeneticAlgorithm from Logger import get_logger logger = get_logger() DK_SALARY_CSV = "DKSalaries.txt" PROJECTIONS_CSV = "fan-pros-projections.csv" def run(fitness_formula, week=None, iterations=9, generations=500): scrape(week) start_time = datetime.now() logger.info("Beginning Algorithm") all_players = get_all_players(DK_SALARY_CSV) all_games = get_all_games(DK_SALARY_CSV) player_holder = NFLPlayerHolder(all_players, all_games) player_holder.update_projections(get_all_projections(PROJECTIONS_CSV)) player_holder.remove_non_projected_players() lineup_gen = NFLLineupGenerator(player_holder) g = GeneticAlgorithm(lineup_gen, fitness_formula) # best_lineup = None # for x in range(0, iterations): # logger.info("Generation {}".format(x + 1)) # if not best_lineup: # best_lineup = g.run(generations)
def main(): parser = argparse.ArgumentParser() # Main Arguments parser.add_argument("--dataset", help="Dataset name", type=str, default='youtube') parser.add_argument("--datapath", help="Path to base dataset folder", type=str, default='../data') parser.add_argument("--student_name", help="Student short name", type=str, default='bert') parser.add_argument("--teacher_name", help="Student short name", type=str, default='ran') # Extra Arguments parser.add_argument("--experiment_folder", help="Dataset name", type=str, default='../experiments/') parser.add_argument("--logdir", help="Experiment log directory", type=str, default='./') parser.add_argument("--metric", help="Evaluation metric", type=str, default='acc') parser.add_argument("--num_iter", help="Number of self/co-training iterations", type=int, default=25) parser.add_argument("--num_supervised_trials", nargs="?", type=int, default=5, help="number of different trials to start self-training with") parser.add_argument('-ws', '--weak_sources', help="List of weak sources name for Teacher", nargs='+') parser.add_argument("--downsample", help="Downsample labeled train & dev datasets randomly stratisfied by label", type=float, default=1.0) parser.add_argument("--oversample", help="Oversample labeled train datasets", type=int, default=1) parser.add_argument("--tokenizer_method", help="Tokenizer method (for LogReg student)", type=str, default='clean') parser.add_argument("--num_epochs", default=70, type=int, help="Total number of training epochs for student.") parser.add_argument("--num_unsup_epochs", default=25, type=int, help="Total number of training epochs for training student on unlabeled data") parser.add_argument("--debug", action="store_true", help="Activate debug mode") parser.add_argument("--soft_labels", action="store_true", help="Use soft labels for training Student") parser.add_argument("--loss_weights", action="store_true", help="Use instance weights in loss function according to Teacher's confidence") parser.add_argument("--convert_abstain_to_random", action="store_true", help="In Teacher, if rules abstain on dev/test then flip a coin") parser.add_argument("--hard_student_rule", action="store_true", help="When using Student as a rule in Teacher, use hard (instead of soft) student labels") parser.add_argument("--train_batch_size", help="Train batch size", type=int, default=16) parser.add_argument("--eval_batch_size", help="Dev batch size", type=int, default=128) parser.add_argument("--unsup_batch_size", help="Unsupervised batch size", type=int, default=128) parser.add_argument("--max_size", help="Max size of unlabeled data for training the student if balance_maxsize==True", type=int, default=1000) parser.add_argument("--max_seq_length", help="Maximum sequence length (student)", type=int, default=64) parser.add_argument("--max_rule_seq_length", help="Maximum sequence length of rule predictions (i.e., max # rules that can cover a single instance)", type=int, default=10) parser.add_argument("--no_cuda", action="store_true", help="Do not use CUDA") parser.add_argument("--lower_case", action="store_true", help="Use uncased model") parser.add_argument("--overwrite", action="store_true", help="Overwrite dataset if exists") parser.add_argument("--weight_decay", default=0.0, type=float, help="Weight decay for student") parser.add_argument("--learning_rate", default=1e-3, type=float, help="The initial learning rate for Adam.") parser.add_argument("--finetuning_rate", default=1e-5, type=float, help="The initial learning rate for Adam.") parser.add_argument("--adam_epsilon", default=1e-8, type=float, help="Epsilon for Adam optimizer.") parser.add_argument("--max_grad_norm", default=1.0, type=float, help="Max gradient norm.") parser.add_argument("--warmup_steps", default=0, type=int, help="Linear warmup over warmup_steps.") parser.add_argument("--logging_steps", type=int, default=500, help="Log every X updates steps.") parser.add_argument("--fp16", action='store_true', help='whehter use fp16 or not') parser.add_argument("--sample_size", nargs="?", type=int, default=16384, help="number of unlabeled samples for evaluating uncetainty on in each self-training iteration") parser.add_argument("--seed", type=int, default=42, help="random seed for initialization") args = parser.parse_args() np.random.seed(args.seed) # Define dataset-specific parameters if args.dataset in ['sms']: args.num_labels = 2 args.metric = 'weighted_f1' args.oversample = 3 elif args.dataset in ['youtube']: args.num_labels = 2 args.metric = 'weighted_acc' args.oversample = 3 elif args.dataset == 'trec': args.num_labels = 6 args.metric = 'weighted_acc' args.oversample = 10 elif args.dataset == 'census': args.num_labels = 2 args.metric = 'weighted_acc' args.train_batch_size = 128 args.oversample = 5 # CENSUS is the only dataset where more than 10 rules may cover a single instance args.max_rule_seq_length = 15 elif args.dataset == 'mitr': args.num_labels = 9 args.metric = 'weighted_f1' args.oversample = 2 args.max_seq_length = 32 args.train_batch_size = 256 args.unsup_batch_size = 128 elif args.dataset in ['spouse']: args.num_labels = 2 args.metric = 'f1' args.max_seq_length = 32 args.train_batch_size = 256 else: raise(BaseException('unknown dataset: {}'.format(args.dataset))) # Start Experiment now = datetime.now() date_time = now.strftime("%Y_%m_%d-%H_%M") args.experiment_folder = os.path.join(args.experiment_folder, args.dataset) args.logdir = os.path.join(args.experiment_folder, args.logdir) if args.debug: args.logdir = os.path.join(args.experiment_folder, 'debug') if os.path.exists(args.logdir): shutil.rmtree(args.logdir) else: args.logdir = args.logdir + "/" + date_time if args.student_name == 'logreg': args.logdir += "_st{}".format(args.student_name.upper()) else: args.logdir += "_{}".format(args.dataset) + \ "_st{}".format(args.student_name.upper()) + \ "_epoch{}".format(args.num_epochs) + \ "_lr{}".format(args.learning_rate) + \ "_batch{}".format(args.train_batch_size) + \ "_maxseq{}".format(args.max_seq_length) if int(args.downsample) != 1: args.logdir += "_downsample{}".format(args.downsample) os.makedirs(args.logdir, exist_ok=True) logger = get_logger(logfile=os.path.join(args.logdir, 'log.log')) # Setup CUDA, GPU & distributed training if args.no_cuda: device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") args.n_gpu = torch.cuda.device_count() else: # Initializes the distributed backend which will take care of sychronizing nodes/GPUs device = torch.device("cuda") args.n_gpu = 1 args.device = device args.train_batch_size = args.train_batch_size * max(1, args.n_gpu) args.eval_batch_size = args.train_batch_size * max(1, args.n_gpu) logger.info("\n\n\t\t *** NEW EXPERIMENT ***\nargs={}".format(args)) astra(args, logger=logger) close(logger)