def test_dict_normalize_df(): # NOTE: This test is a bit circuitous bc storage of a target df requires # storing some source from which to generate that df; in this case, we # are using test_utils_csv_to_df.csv which represents the desired csv # representation of the df with successful execution of the fxn; # however, to get our test df in a 1:1 comparable state to the target df # generated from this csv, we have to first convert the test df to csv # and then convert it back to a df; an annoying nuance, but necessary for # true apples-to-apples comparison between dfs generated this way utils = Utils() INDEX = "ISIN" ARGUMENT = "articleList" with open("./utils/tests/test_utils_dict_to_df.txt") as dict_file: test_dict = json.load(dict_file) target_df = pd.read_csv("./utils/tests/test_utils_csv_to_df.csv", keep_default_na=False) temp_df = utils.dict_normalize_df( test_dict, INDEX, ARGUMENT, ) temp_df.to_csv( "./utils/tests/test_utils_df_to_csv.csv", sep=",", index=None, header=True, ) test_df = pd.read_csv("./utils/tests/test_utils_df_to_csv.csv", keep_default_na=False) assert test_df.equals(target_df)
def bamtools(self): i = self.__path('bamtools') cmd = "git clone {url} {d}".format(url=i.url, d=i.build_dir) Utils.run(cmd) i = self.__path('bamtools') cmd = "mkdir -p build && cd build && CC=gcc-4.8 CXX=g++-4.8 cmake -DCMAKE_INSTALL_PREFIX:PATH={local_dir} .. && make -j {num_cores} install".format( local_dir=shellquote(i.local_dir), num_cores=self.num_cores()) Utils.run_in_dir(cmd, i.build_dir)
def test_text_parse_dict(): utils = Utils() test_filepath = "./utils/tests/test_utils_creds.txt" test_dict = utils.text_parse_dict(filepath=test_filepath, separator="=") target_dict = { "USERNAME": "******", "PASSWORD": "******", "APPID": "MyTestApplication", } assert test_dict == target_dict
def __init__(self): self.base_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "external")) self.ext_tars = os.path.join(self.base_dir, "tarballs") self.ext_build = os.path.join(self.base_dir, "build") self.install_dir = os.path.join(self.base_dir, "local") Utils.mkdir(self.ext_tars) Utils.mkdir(self.ext_build) self.paths = {} self.paths["zi_lib"] = self.__zi_lib() self.paths["cppitertools"] = self.__cppitertools() self.paths["boost"] = self.__boost()
def test_uuid_generator(): utils = Utils() test_seedvar = 6 test_bitcount = 128 test_refid = "4295905573" # Apple Inc (Organization) RefID target_uuid_final = "14fe8b5a6ec8d00eddbfb6bd60c2e82e13a59db3" test_uuid_final = utils.uuid_generator( seedvar=test_seedvar, bitcount=test_bitcount, refstring=test_refid, ) assert test_uuid_final == target_uuid_final
def __init__(self): self.base_dir = os.path.abspath( os.path.join(os.path.dirname(__file__), "external")) self.ext_tars = os.path.join(self.base_dir, "tarballs") self.ext_build = os.path.join(self.base_dir, "build") self.install_dir = os.path.join(self.base_dir, "local") Utils.mkdir(self.ext_tars) Utils.mkdir(self.ext_build) self.paths = {} self.paths["zi_lib"] = self.__zi_lib() self.paths["cppitertools"] = self.__cppitertools() self.paths["boost"] = self.__boost()
def __build(self, i, cmd): print "\t getting file..." fnp = Utils.get_file_if_size_diff(i.url, self.paths.ext_tars) Utils.clear_dir(i.build_dir) Utils.untar(fnp, i.build_dir) try: Utils.run_in_dir(cmd, i.build_sub_dir) except: Utils.rm_rf(i.local_dir) sys.exit(1)
def __processArgs(self): dirs = self.args.dirsToDelete if not dirs: return if "all" == dirs[0]: dirs = [] for k, _ in self.paths.paths.iteritems(): dirs.append(k) for e in dirs: p = self.__path(e) if p.build_dir: Utils.rm_rf(p.build_dir) if p.local_dir: Utils.rm_rf(p.local_dir)
def num_cores(self): c = Utils.num_cores() if c > 8: return 8 if 1 == c: return 1 return c - 1
def bamtools(self): i = self.__path('bamtools') cmd = "git clone {url} {d}".format(url=i.url, d=i.build_dir) Utils.run(cmd) i = self.__path('bamtools') #cmd = "mkdir -p build && cd build && CC=gcc-4.8 CXX=g++-4.8 cmake -DCMAKE_INSTALL_PREFIX:PATH={local_dir} .. && make -j {num_cores} install".format( # local_dir=shellquote(i.local_dir), num_cores=self.num_cores()) if(sys.platform == "darwin"): cmd = "mkdir -p build && cd build && CC=clang CXX=clang++ cmake -DCMAKE_INSTALL_PREFIX:PATH={local_dir} .. && make -j {num_cores} install".format( local_dir=shellquote(i.local_dir), num_cores=self.num_cores()) else: if self.args.clang: cmd = "mkdir -p build && cd build && CC=clang CXX=clang++ cmake -DCMAKE_INSTALL_PREFIX:PATH={local_dir} .. && make -j {num_cores} install".format( local_dir=shellquote(i.local_dir), num_cores=self.num_cores()) else: cmd = "mkdir -p build && cd build && CC=gcc-4.8 CXX=g++-4.8 cmake -DCMAKE_INSTALL_PREFIX:PATH={local_dir} .. && make -j {num_cores} install".format( local_dir=shellquote(i.local_dir), num_cores=self.num_cores()) Utils.run_in_dir(cmd, i.build_dir)
def returnresult(actionid, result): options = { "SN": "00010001", "CMD": "actionresult", "actionID": actionid, "result": result } print(options) print(Utils().http_post("/north/", options))
def __processArgs(self): dirs = self.args.dirsToDelete if not dirs: return if "all" == dirs[0]: dirs = [] for k, _ in self.paths.paths.iteritems(): dirs.append(k) if "shark" in dirs and not "boost149" in dirs: dirs.append("boost149") for e in dirs: p = self.__path(e) if p.build_dir: Utils.rm_rf(p.build_dir) if p.local_dir: Utils.rm_rf(p.local_dir)
def __init__(self): self.base_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "external")) self.ext_tars = os.path.join(self.base_dir, "tarballs") self.ext_build = os.path.join(self.base_dir, "build") self.install_dir = os.path.join(self.base_dir, "local") Utils.mkdir(self.ext_tars) Utils.mkdir(self.ext_build) self.paths = {} self.paths["zi_lib"] = self.__zi_lib() self.paths["cppitertools"] = self.__cppitertools() self.paths["cppprogutils"] = self.__cppprogutils() self.paths["boost"] = self.__boost() self.paths["R-devel"] = self.__Rdevel() self.paths["cppcms"] = self.__cppcms() self.paths["bamtools"] = self.__bamtools() self.paths["pear"] = self.__pear() self.paths["mathgl"] = self.__mathgl() self.paths["armadillo"] = self.__armadillo() self.paths["mlpack"] = self.__mlpack() self.paths["liblinear"] = self.__liblinear()
def main(): args = parse_args() s = Setup(args) if args.print_libs: print "Available installs:" count = 1 installs = s.allSetUps installs.sort() for set in installs: print count , ")" , set count = count + 1 elif args.addBashCompletion: cmd = "cat bashCompletes/* >> ~/.bash_completion" Utils.run(cmd) if args.bib_cpp: if args.dev: cmd = "echo \"complete -F _bibCppTools proto\" >> ~/.bash_completion" Utils.run(cmd) cmd = "echo \"complete -F _bibCppTools bioalg\" >> ~/.bash_completion" Utils.run(cmd) cmd = "echo \"complete -F _bibCppTools euler\" >> ~/.bash_completion" Utils.run(cmd) else: s.setup()
def cppitertools(self): self.__git(self.__path('cppitertools')) i = self.__path('cppitertools') cmd = "cd {d} && git checkout d4f79321842dd584f799a7d51d3e066a2cdb7cac".format(d=shellquote(i.local_dir)) Utils.run(cmd)
def __git(self, i): cmd = "git clone {url} {d}".format(url=i.url, d=shellquote(i.local_dir)) Utils.run(cmd)
df.info() df.shape Creating a list type variable called **col_remove**, in which the features that are not important for our goal will be added col_remove = ['id'] ## Analysis of Missing Values df.isna().sum() Loading a class called Utils, this class helps to vizualize the data utils = Utils() utils.plot_variables_nan(df) utils.df_nan Removing the **riesgo** variable since it has more than 99% the NaN df = df.drop(columns=['riesgo']) df.shape ## Analysis of target value df[['client']].hist() plt.ylabel('Count')
def train(self): # initializing some loss functions that will be used criterion = nn.MSELoss() l2_loss = nn.MSELoss() l1_loss = nn.L1Loss() print('Training...') for epoch in range(self.num_epochs): for sample in self.data_loader: # getting each key value of the sample in question (each sample is a dictionary) right_images = sample['face'] onehot = sample['onehot'] raw_wav = sample['audio'] wrong_images = sample['wrong_face'] id_labels = from_onehot_to_int( onehot ) # list with the position of the youtuber which the audio in question belongs # defining the inputs as Variables and allocate them into the GPU right_images = Variable(right_images.float()).cuda() raw_wav = Variable(raw_wav.float()).cuda() wrong_images = Variable(wrong_images.float()).cuda() onehot = Variable(onehot.float()).cuda() id_labels = Variable(id_labels).cuda() # tensor of 64 (num of samples per batch) ones and zeros that will be used to compute D loss. real_labels = torch.ones(right_images.size(0)) fake_labels = torch.zeros(right_images.size(0)) # ======== One sided label smoothing ========== # Helps preventing the discriminator from overpowering the # generator adding penalty when the discriminator is too confident # ============================================= smoothed_real_labels = torch.FloatTensor( Utils.smooth_label( real_labels.numpy(), -0.1)) # so smooth_real_labels will now be 0.9 # allocating the three variables into GPU real_labels = Variable(real_labels).cuda() smoothed_real_labels = Variable(smoothed_real_labels).cuda() fake_labels = Variable(fake_labels).cuda() # ======= # # TRAIN D # # ======= # # setting all the gradients to 0 self.discriminator.zero_grad() # feeding G only with wav file fake_images, z_vector, _ = self.generator(raw_wav) # feeding D with the generated images and z vector whose dimensions will be needed # for the concatenation in the last hidden layer outputs, _ = self.discriminator(fake_images, z_vector) # computing D loss when feeding fake images fake_score = outputs # log file purposes fake_loss = criterion(outputs, fake_labels) # feeding D with the real images and z vector again outputs, activation_real = self.discriminator( right_images, z_vector) # computing D loss when feeding real images real_score = outputs real_loss = criterion(outputs, smoothed_real_labels) # feeding D with real images but not corresponding to the wav under training outputs, _ = self.discriminator(wrong_images, z_vector) # computing D loss when feeding real images but not the ones corresponding to the input audios wrong_loss = criterion(outputs, fake_labels) wrong_score = outputs # the discriminator loss function is the sum of the three of them d_loss = real_loss + fake_loss + wrong_loss d_loss.backward() self.optimD.step() # ======= # # TRAIN G # # ======= # # setting all the gradients to 0 self.generator.zero_grad() # feeding G only with wav file fake_images, z_vector, softmax_scores = self.generator(raw_wav) # feeding D with the generated images and z vector. Storing intermediate layer activations for loss computation purposes outputs, activation_fake = self.discriminator( fake_images, z_vector) # feeding D with the real images and z vector. Storing intermediate layer activations for loss computation purposes _, activation_real = self.discriminator(right_images, z_vector) activation_fake = torch.mean(activation_fake, 0) activation_real = torch.mean(activation_real, 0) # ======= Generator Loss function============ # This is a customized loss function, the first term is the mean square error loss # The second term is feature matching loss, this measure the distance between the real and generated # images statistics by comparing intermediate layers activations # The third term is L1 distance between the generated and real images, this is helpful for the conditional case # because it links the embedding feature vector directly to certain pixel values. # =========================================== # computing first the part of the loss related to the softmax classifier after the embedding softmax_criterion = nn.CrossEntropyLoss() softmax_loss = softmax_criterion(softmax_scores, id_labels) g_loss = criterion(outputs, real_labels) \ + self.l2_coef * l2_loss(activation_fake, activation_real.detach()) \ + self.l1_coef * l1_loss(fake_images, right_images)\ + self.softmax_coef * softmax_loss # we have seen softmax_loss starts around 2 and g_loss around 20... That's why we've scaled by 10 # applying backpropagation and updating parameters. g_loss.backward() self.optimG.step() # store the info in the logger at each epoch self.logger.log_iteration_gan(epoch, d_loss, g_loss, real_score, fake_score, wrong_score) # storing the parameters for every 10 epochs if (epoch) % 10 == 0: Utils.save_checkpoint(self.discriminator, self.generator, self.checkpoints_path, self.save_path, epoch)
def test_text_parse_list(): utils = Utils() test_filepath = "./utils/tests/test_utils_entities.txt" test_list = utils.text_parse_list(filepath=test_filepath) target_list = ["ABCD", "1234", "!@#$"] assert test_list == target_list
from scripts.BsHelper import BsHelper from scripts.BsParser import BsParser from scripts.utils import Utils import pandas as pd import os, time email = input("Enter linkedin email: ") password = input("Enter linkedin password: "******"Hi {}, I want to connect with you." # Login to browser bsHelper = BsHelper("chromedriver.exe") bsHelper.loginLinkedin(email, password) bsParser = BsParser() utils = Utils() time.sleep(5) # Change according to your requirements. For 1 page you get 10 results. URL_TO_SEARCH = "https://www.linkedin.com/search/results/people/?keywords=senior%20data%20scientist%20job%20actively&origin=SWITCH_SEARCH_VERTICAL" noOfPages = 3 DATA_DIR = "data" CSV_DIR = "csv" CSV_NAME = "accounts.csv" # Here we have set no of pages 1. For hundered profiles set number of pages 10 def createDir(path): if not os.path.exists(path): os.mkdir(path)
def cppitertools(self): self.__git(self.__path('cppitertools')) i = self.__path('cppitertools') cmd = "cd {d} && git checkout d4f79321842dd584f799a7d51d3e066a2cdb7cac".format( d=shellquote(i.local_dir)) Utils.run(cmd)
def train(self): criterion = nn.MSELoss() l2_loss = nn.MSELoss() l1_loss = nn.L1Loss() print('Training...') for epoch in range(self.num_epochs): for sample in self.data_loader: right_images = sample['face'] onehot = sample['onehot'] raw_wav = sample['audio'] wrong_images = sample['wrong_face'] id_labels = from_onehot_to_int(onehot) right_images = Variable(right_images.float()).cuda() raw_wav = Variable(raw_wav.float()).cuda() wrong_images = Variable(wrong_images.float()).cuda() onehot = Variable(onehot.float()).cuda() id_labels = Variable(id_labels).cuda() real_labels = torch.ones(right_images.size(0)) fake_labels = torch.zeros(right_images.size(0)) smoothed_real_labels = torch.FloatTensor( Utils.smooth_label( real_labels.numpy(), -0.1)) # so smooth_real_labels will now be 0.9 real_labels = Variable(real_labels).cuda() smoothed_real_labels = Variable(smoothed_real_labels).cuda() fake_labels = Variable(fake_labels).cuda() self.discriminator.zero_grad() fake_images, z_vector, _ = self.generator(raw_wav) outputs, _ = self.discriminator(fake_images, z_vector) fake_score = outputs fake_loss = criterion(outputs, fake_labels) outputs, activation_real = self.discriminator( right_images, z_vector) real_score = outputs real_loss = criterion(outputs, smoothed_real_labels) outputs, _ = self.discriminator(wrong_images, z_vector) wrong_loss = criterion(outputs, fake_labels) wrong_score = outputs d_loss = real_loss + fake_loss + wrong_loss d_loss.backward() self.optimD.step() self.generator.zero_grad() fake_images, z_vector, softmax_scores = self.generator(raw_wav) outputs, activation_fake = self.discriminator( fake_images, z_vector) _, activation_real = self.discriminator(right_images, z_vector) activation_fake = torch.mean(activation_fake, 0) activation_real = torch.mean(activation_real, 0) softmax_criterion = nn.CrossEntropyLoss() softmax_loss = softmax_criterion(softmax_scores, id_labels) g_loss = criterion(outputs, real_labels) \ + self.l2_coef * l2_loss(activation_fake, activation_real.detach()) \ + self.l1_coef * l1_loss(fake_images, right_images)\ + self.softmax_coef * softmax_loss g_loss.backward() self.optimG.step() self.logger.log_iteration_gan(epoch, d_loss, g_loss, real_score, fake_score, wrong_score) if (epoch) % 10 == 0: Utils.save_checkpoint(self.discriminator, self.generator, self.checkpoints_path, self.save_path, epoch)
def getwans(): sp = subprocess.run(["ip", "route", "show", "default"], stdout=subprocess.PIPE) wans = "" for line in sp.stdout.splitlines(): l = line.decode().split() # wans += (l[4] + "," + l[2] + ";") sp2 = subprocess.run(["ip", "address", "show", l[4]], stdout=subprocess.PIPE) ip = None for ll in sp2.stdout.splitlines(): nl = ll.decode() if "inet " in nl: ip = nl.split()[1].split("/")[0] break if ip != None: wans += (l[4] + "," + ip + ";") return wans if __name__ == "__main__": with open('config.json') as json_file: config = json.load(json_file) config["CMD"] = "query" config["wans"] = getwans() print(Utils.getInstance().http_post("/north/", config)) print(config)
def test_format_expected_date(self): expected_date = Utils.format_expected_date("Friday,10:30AM", "%Y-%m-%d %I:%M%p") self.assertTrue("10:30AM" in expected_date)