def writePredictionsByCellLine(self, drug_scores_by_cell_line, input_folder): total_drugs = numpy.max( [len(drugs) for drugs in drug_scores_by_cell_line.values()]) header = ["Cell Line"] best_drug = " best drug" best_drug_score = " best drug score" for i in range(1, total_drugs + 1): suffix = MachineLearningService.generateNumericalSuffix(i) header.append(SafeCastUtil.safeCast(i, str) + suffix + best_drug) header.append( SafeCastUtil.safeCast(i, str) + suffix + best_drug_score) predictions_by_cell_line_path = input_folder + "/" + RecommendationsService.PREDICTIONS_BY_CELL_LINE_FILE with open(predictions_by_cell_line_path, "w", newline='') as predictions_by_cell_line_file: try: writer = csv.writer(predictions_by_cell_line_file) writer.writerow(header) for cell_line in drug_scores_by_cell_line.keys(): drug_scores = sorted( drug_scores_by_cell_line.get(cell_line), reverse=True, key=lambda x: x[1]) row = [cell_line] for drug_and_score in drug_scores: row.append(drug_and_score[0]) row.append(drug_and_score[1]) writer.writerow(row) except ValueError as error: self.log.error("Error writing to %s. %s", predictions_by_cell_line_file, error)
def fetchFeatureImportances(self, model, features_in_order): evaluated_features = [ feature for feature in features_in_order if self.bin_cat_matrix_name not in feature ] importances_map = OrderedDict() for model_phrase in model.models_by_phrase: if hasattr(model_phrase.model, "coef_") and len(evaluated_features) == len( model_phrase.model.coef_): for i in range(0, len(evaluated_features)): weighted_score = model_phrase.model.coef_[ i] * model_phrase.score if importances_map.get(evaluated_features[i]) is None: importances_map[evaluated_features[i]] = [ weighted_score ] else: importances_map[evaluated_features[i]].append( weighted_score) feature_names = SafeCastUtil.safeCast(importances_map.keys(), list) average_coefficients = [ numpy.sum(imps) / len(evaluated_features) for imps in SafeCastUtil.safeCast(importances_map.values(), list) ] return super().normalizeCoefficients(average_coefficients, feature_names)
def toDict(dict_as_string): dictionary = OrderedDict() split_dict = dict_as_string.split(",") for key_val_pair in split_dict: as_tuple = SafeCastUtil.safeCast(key_val_pair.split(":"), tuple) dictionary[as_tuple[0].strip()] = SafeCastUtil.safeCast(as_tuple[1].strip(), float, as_tuple[1].strip()) return dictionary
def generateGeneLists(self, features_per_file, important_features): gene_list_num = 1 while len(important_features) > 1: gene_list_size = random.randint(2, len(important_features)) gene_list = [ self.FEATURE_PREFIX + SafeCastUtil.safeCast(feature, str) for feature in important_features[:gene_list_size] ] gene_list.append("") important_features = important_features[gene_list_size:] file_name = self.path + "/" + ArgumentProcessingService.GENE_LISTS + \ SafeCastUtil.safeCast(gene_list_num, str) + ".csv" with open(file_name, "w") as file: writer = csv.writer(file, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL) writer.writerow(gene_list) gene_list_num += 1 with open(self.path + "/" + self.SIGNIFICANT_GENE_LIST + ".csv", "w") as file: writer = csv.writer(file, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL) significant_features = [] for significant_feature in range( 1, SafeCastUtil.safeCast(features_per_file / 10, int) + 1): significant_features.append( RandomizedDataGenerator.SIGNIFICANT_FEATURE_PREFIX + SafeCastUtil.safeCast(significant_feature, str)) gene_list = significant_features writer.writerow(gene_list)
def generateRandomizedFiles(self, num_feature_files, num_cells, num_features, is_classifier, monte_carlo_permutations, data_split, individual_algorithm=None, individual_hyperparams=None, analyze_all=False, use_static_features=False): features_per_file = SafeCastUtil.safeCast( num_features / num_feature_files, int) results = self.generateResultsCSV(is_classifier, num_cells) important_features = random.sample( range(1, features_per_file + 1), SafeCastUtil.safeCast((features_per_file / 3), int)) file_names = self.generateFeaturesCSVs(num_feature_files, num_cells, features_per_file, results, important_features, use_static_features) self.generateGeneLists(features_per_file, important_features) static_features = "" if use_static_features: static_features += file_names[len(file_names) - 1] self.generateArgsTxt(is_classifier, monte_carlo_permutations, data_split, individual_algorithm, file_names[0], individual_hyperparams, analyze_all, static_features) return
def generateNewReportFile(self, stats_overview_object): path_of_this_file = os.path.realpath(__file__) template_path = os.path.abspath( os.path.join(path_of_this_file, os.pardir)) + "/Reports/reportTemplate.html" new_file = [] with open(template_path) as template_file: try: for line_index, line in enumerate(template_file): if "//INSERT DEFAULT MIN SCORE HERE" in line: new_file.append( "\t\t\t\tvar DEFAULT_MIN_SCORE = " + SafeCastUtil.safeCast( AbstractModelTrainer.DEFAULT_MIN_SCORE, str) + ";\n") elif "//INSERT CHART DATA HERE" in line: new_file.append( "\t\t\t\t$scope.allData = " + SafeCastUtil.safeCast(stats_overview_object, str) + ";\n") elif "//INSERT IS CLASSIFIER HERE" in line: new_file.append("\t\t\t\t$scope.isClassifier = " + SafeCastUtil.safeCast( self.is_classifier, str).lower() + ";\n") else: new_file.append(line) except ValueError as valueError: self.log.error(valueError) finally: template_file.close() return new_file
def logIfBestHyperparamsOnRangeThreshold(self, best_hyperparams, record_diagnostics, input_folder): if not self.supportsHyperparams() or best_hyperparams is None: return hyperparam_keys = SafeCastUtil.safeCast(self.hyperparameters.keys(), list) for i in range(0, len(hyperparam_keys)): hyperparam_set = self.hyperparameters[hyperparam_keys[i]] optimal_value = best_hyperparams.get(hyperparam_keys[i]) if optimal_value is None: self.log.warn( "Unable to determine optimal value given hyperparams: %s", SafeCastUtil.safeCast(best_hyperparams, str, None)) continue if optimal_value >= hyperparam_set[len(hyperparam_set) - 1]: message = "Best hyperparam for " + self.algorithm + " on upper threshold of provided hyperparam " \ "set: " + hyperparam_keys[i] + " = " + SafeCastUtil.safeCast(optimal_value, str) + "\n" self.log.debug(message) if record_diagnostics: DiagnosticsFileWriter.writeToFile(input_folder, message, self.log) elif optimal_value <= hyperparam_set[0]: message = "Best hyperparam for " + self.algorithm + " on lower threshold of provided hyperparam " \ "set: " + hyperparam_keys[i] + " = " + SafeCastUtil.safeCast(optimal_value, str) + "\n" self.log.debug(message) if record_diagnostics: DiagnosticsFileWriter.writeToFile(input_folder, message, self.log)
def fetchAndCastHyperparams(self, config, trainer): hyperparams = config.hyperparams.split(",") hyperparam_dict = OrderedDict() keys = SafeCastUtil.safeCast(trainer.hyperparameters.keys(), list) for i in range(0, len(keys)): hyperparam_dict[keys[i]] = SafeCastUtil.safeCast(hyperparams[i], float) return hyperparam_dict
def promptUserForInput(): simulation_to_run = input( "-------Main Menu-------\n" "Choose your task:\n" "\t0: Analysis of cell lines\n" "\t1: Convert MATLAB to CSV file\n" "\t2: Dr.S Analysis (Drug Recommendations System)\n" "\tQ: Quit\n") option_as_int = SafeCastUtil.safeCast(simulation_to_run, int) option_as_string = SafeCastUtil.safeCast(simulation_to_run, str, "Q") if option_as_string == "Q": return elif option_as_int == 0: input_folder = recursivelyPromptUser("Enter path of input folder:\n", str) runMainCellLineAnalysis(input_folder) elif option_as_int == 1: matlab_files_directory = recursivelyPromptUser( "Enter folder path of the matlab files:\n", str) FileConverter.convertMatLabToCSV(matlab_files_directory) elif option_as_int == 2: input_folder = recursivelyPromptUser( "Enter folder path of the input folder:\n", str) fetchRecommendations(input_folder)
def writeToPredictionsCsvInLock(self, cell_line, drug, input_folder, prediction, score): self.log.debug("Locking current thread %s.", threading.current_thread()) lock = threading.Lock() lock.acquire(True) write_action = "w" if self.PREDICTIONS_FILE in os.listdir(input_folder): write_action = "a" with open(input_folder + "/" + self.PREDICTIONS_FILE, write_action, newline='') as predictions_file: try: writer = csv.writer(predictions_file) if write_action == "w": writer.writerow( ["Drug", "Cell_Line", "Prediction", "R2^Score"]) line = [ drug, cell_line, SafeCastUtil.safeCast(prediction, str), SafeCastUtil.safeCast(score, str) ] writer.writerow(line) except ValueError as error: self.log.error("Error writing to file %s. %s", self.PREDICTIONS_FILE, error) finally: predictions_file.close() self.log.debug("Releasing current thread %s.", threading.current_thread()) lock.release()
def fetchAllHyperparamPermutations(self, hyperparams): all_perms = [] hyperparam_keys = SafeCastUtil.safeCast(hyperparams.keys(), list) zero_filled_indices = SafeCastUtil.safeCast( numpy.zeros(len(hyperparam_keys)), list) target_index = len(zero_filled_indices) - 1 current_perm = zero_filled_indices[:] while target_index >= 0: current_hyperparams = OrderedDict() for i in range(0, len(current_perm)): param_name = hyperparam_keys[i] current_hyperparams[param_name] = hyperparams[param_name][ SafeCastUtil.safeCast(current_perm[i], int)] if current_hyperparams not in all_perms: clone_map = copy.deepcopy(current_hyperparams) all_perms.append(clone_map) if current_perm[target_index] < len( hyperparams[hyperparam_keys[target_index]]) - 1: current_perm[target_index] += 1 while len(current_perm) > target_index + 1 and current_perm[target_index + 1] <\ len(hyperparams[hyperparam_keys[target_index]]): target_index += 1 else: target_index -= 1 for subsequent_index in range(target_index, len(current_perm) - 1): current_perm[subsequent_index + 1] = 0 return all_perms
def extractCastedFeatures(self, line): features = [] for feature in line.split(","): if SafeCastUtil.safeCast(feature, float) is not None: features.append(SafeCastUtil.safeCast(feature.strip(), float)) else: features.append(SafeCastUtil.safeCast(feature.strip(), str)) return features
def toString(dictionary): hyperparam_string = "" keys = SafeCastUtil.safeCast(dictionary.keys(), list) for i in range(0, len(keys)): hyperparam_string += (keys[i] + ": " + SafeCastUtil.safeCast(dictionary[keys[i]], str)) if i < len(keys) - 1: hyperparam_string += ", " return hyperparam_string
def formatFullFeatureMatrix(self, feature_names, transposed_dict): feature_matrix = {self.FEATURE_NAMES: feature_names} all_cell_lines = SafeCastUtil.safeCast(transposed_dict.keys(), list) num_cell_lines = len(all_cell_lines) for i in range(num_cell_lines): values = SafeCastUtil.safeCast(transposed_dict[all_cell_lines[i]].values(), list) formatted_values = [self.formatValue(value) for value in values] feature_matrix[all_cell_lines[i]] = SafeCastUtil.safeCast(formatted_values, list) return feature_matrix
def furtherSplitTrainingMatrix(self, percent, matrix): self.log.info(percent, matrix) new_matrix_len = SafeCastUtil.safeCast( len(matrix.keys()) * (percent / 100), int) split_matrix = {} for cell_line in SafeCastUtil.safeCast(matrix.keys(), list): if len(split_matrix.keys()) < new_matrix_len: split_matrix[cell_line] = matrix[cell_line] return matrix
def logOptimalHyperParams(self, hyperparams, feature_set_as_string, record_diagnostics, input_folder): message = "Optimal Hyperparameters for " + feature_set_as_string + " " + self.algorithm + " algorithm " \ "chosen as:\n" for key in SafeCastUtil.safeCast(hyperparams.keys(), list): message += "\t" + key + " = " + SafeCastUtil.safeCast( hyperparams[key], str) + "\n" self.log.info(message) if record_diagnostics: DiagnosticsFileWriter.writeToFile(input_folder, message, self.log)
def writeRandomFeature(self, file_name): if self.BINARY_CATEGORICAL_SUFFIX in file_name: return [np.random.choice(["'0'", "'1'"])] if self.CATEGORICAL_SUFFIX in file_name: return [ SafeCastUtil.safeCast( np.random.choice(["a", "b", "c", "d", "e"]), str) ] elif self.INTEGER_SUFFIX in file_name: return [SafeCastUtil.safeCast(np.random.randint(0, 100), str)] else: return [SafeCastUtil.safeCast(np.random.random_sample(), str)]
def extractCastedFeatures(self, line, important_feature_indices): important_features = [] feature_values = line.strip().split(",") for index in important_feature_indices: if index is None: # TODO: Verify that this is acceptable, it works for one hot encoding and should never vary in any model important_features.append(self.UNFILLED_VALUE_PLACEHOLDER) else: if SafeCastUtil.safeCast(feature_values[index], float) is not None: important_features.append(SafeCastUtil.safeCast(feature_values[index].strip(), float)) else: important_features.append(SafeCastUtil.safeCast(feature_values[index].strip(), str)) return important_features
def writeDiagnostics(self, features_removed): message = "" for feature_file in features_removed: message += "\nFeatures from gene list(s) not available in " + feature_file[0] + ":\n" for gene_list in feature_file[1].keys(): num_genes_missing = len(feature_file[1][gene_list]) percent_genes_missing = round((num_genes_missing / feature_file[2]) * 100, 2) message += ("\t" + SafeCastUtil.safeCast(num_genes_missing, str) + " (" + SafeCastUtil.safeCast(percent_genes_missing, str) + " %" + ") features not present in " + gene_list + ".csv:\n") for gene in feature_file[1][gene_list]: message += ("\t\t" + gene[0] + " at index " + SafeCastUtil.safeCast(gene[1], str) + "\n") message += "\n\n######################\n\n" DiagnosticsFileWriter.writeToFile(self.input_folder, message, self.log)
def promptUserForInput(): simulation_to_run = input("-------Main Menu-------\n" "Choose your task:\n" "\t0: Analysis of cell lines\n" "\tQ: Quit\n") simulation_as_int = SafeCastUtil.safeCast(simulation_to_run, int) simulation_as_string = SafeCastUtil.safeCast(simulation_to_run, str, "Q") if simulation_as_string == "Q": return elif simulation_as_int == 0: input_folder = recursivelyPromptUser("Enter path of input folder:\n", str) runMainCellLineAnalysis(input_folder)
def convertMatLabToCSV(matlab_files_directory): log = LoggerFactory.createLog(__name__) os.chdir(matlab_files_directory) matlab_files = glob.glob("*.mat") for input_file in matlab_files: drug_name = input_file.split("gexmutcnum.mat")[0].strip() new_directory = matlab_files_directory + "/" + drug_name + "_analysis" matlab_file = scipy.io.loadmat(input_file) os.mkdir(new_directory) format_id_string = lambda array: SafeCastUtil.safeCast( array[0], str) for key in SafeCastUtil.safeCast( FileConverter.VARIABLE_MATCHES.keys(), list): header = [ format_id_string(feature_name) for feature_name in matlab_file.get(key)[0] ] file_name = new_directory + "/" + drug_name + "_" + FileConverter.FILE_NAMES[ key] + ".csv" cell_line_data = FileConverter.formatCellLineData( matlab_file.get(FileConverter.VARIABLE_MATCHES.get(key)), key) FileConverter.validateAndWriteCSV( cell_line_data, header, file_name, log, FileConverter.EXPECTED_TYPES[key]) cell_line_ids = [ format_id_string(cell_id) for cell_id in matlab_file.get(FileConverter.ID_FIELD) ] results = matlab_file.get(FileConverter.RESULTS_FIELD) zipped_results = SafeCastUtil.safeCast( zip(cell_line_ids, results[0]), list) results_file = new_directory + "/" + drug_name + "_results.csv" FileConverter.validateAndWriteCSV(zipped_results, ["cell_line", "result"], results_file, log, float) log.info( "The MATLAB file for %s has been successfully converted into csv files ready to be used" " with the CLA software!", drug_name) log.info("All MATLAB files have been processed!")
def oneHotEncodeCategoricalVariables(self, genomes_matrix, categorical_variables): if categorical_variables is None or len(categorical_variables) == 0: return genomes_matrix encoded_matrix = [] # List of lists for genome in genomes_matrix: encoded_matrix.append(list(genome)) sorted_deduped_variables = numpy.sort( numpy.unique(categorical_variables))[::-1] for variable_raw in sorted_deduped_variables: categorical_variable = SafeCastUtil.safeCast(variable_raw, int) if categorical_variable is None: self.log.warning( "Aborting one-hot-encoding. Non-integer categorical variable index detected." ) if len(encoded_matrix[0]) > categorical_variable > 0: assigned_values = [] for genome in encoded_matrix: value = genome[categorical_variable] if SafeCastUtil.safeCast(value, int) is None: self.log.warning( "Aborting one-hot-encoding. Non integer value for categorical variable " "detected.") return genomes_matrix if value not in assigned_values: assigned_values.append(value) assigned_values = numpy.sort(assigned_values) for matrix_row in range(0, len(encoded_matrix)): for feature_index in range( 0, len(encoded_matrix[matrix_row])): if feature_index == categorical_variable: value_as_multiple_categories = [] for assigned_value in assigned_values: boolean_value = 0 if assigned_value == encoded_matrix[ matrix_row][feature_index]: boolean_value = 1 value_as_multiple_categories.append( boolean_value) new_genome = numpy.concatenate( (encoded_matrix[matrix_row] [:categorical_variable], value_as_multiple_categories, encoded_matrix[matrix_row] [categorical_variable + 1:])) encoded_matrix[matrix_row] = new_genome return encoded_matrix
def assertResultsForIndividualCombo(self, target_dir, algorithm, expected_lines, is_classifier): file_name = algorithm + ".csv" assert file_name in os.listdir(target_dir) num_lines = 0 with open(target_dir + "/" + file_name) as csv_file: try: for line_index, line in enumerate(csv_file): num_lines += 1 line_split = line.strip().split(",") if line_index == 0: assert line_split == MachineLearningService.getCSVFileHeader( is_classifier, algorithm, 1) continue feature_gene_list_combo = line_split[0] assert ":" in feature_gene_list_combo score = SafeCastUtil.safeCast(line_split[1], float) assert score > AbstractModelTrainer.DEFAULT_MIN_SCORE if len(line_split) > 3: top_importance = line_split[3] assert top_importance is not None except AssertionError as error: self.log.error(error) finally: self.log.debug("Closing file %s", file_name) csv_file.close() assert num_lines == expected_lines
def createAndValidateFeatureMatrix(self, results_list, gene_lists, write_diagnostics, feature_files, static_feature_files): incomplete_features = [] for file in [feature_file for feature_file in feature_files if feature_file not in static_feature_files]: features_path = self.input_folder + "/" + file validated_features, num_features = self.validateGeneLists(features_path, file, gene_lists) incomplete_features.append([file, validated_features, num_features]) if write_diagnostics: self.writeDiagnostics(incomplete_features) feature_matrix = {self.FEATURE_NAMES: []} for file in feature_files: features_path = self.input_folder + "/" + file if file not in static_feature_files: self.extractFeatureMatrix(feature_matrix, features_path, file, gene_lists, results_list) else: data_frame = self.fetchFullDataframe([result[0] for result in results_list], file) feature_names = SafeCastUtil.safeCast(data_frame.columns, list) transposed_dict = data_frame.T.to_dict() formatted_matrix = self.formatFullFeatureMatrix(feature_names, transposed_dict) for key in formatted_matrix.keys(): if key in feature_matrix: [feature_matrix[key].append(value) for value in formatted_matrix[key]] else: feature_matrix[key] = formatted_matrix[key] return feature_matrix
def validateAndExtractResults(self, results_file, is_classifier): sample_list = [] cast_type = float if is_classifier: cast_type = int results_path = self.input_folder + "/" + results_file with open(results_path) as data_file: try: for line_index, line in enumerate(data_file): if len(re.findall(r'^\s*$', line)) > 0 or line_index == 0: # header or whitespace continue line_trimmed_split = line.strip().split(",") if len(line_trimmed_split) != 2: self.log.error("Each line in %s must be 2 columns. Aborting argument processing.", results_file) raise ValueError("Each line in results file must be 2 columns.") cell_line = line_trimmed_split[0] cell_result = SafeCastUtil.safeCast(line_trimmed_split[1], cast_type) if cell_line in sample_list: self.log.error("Repeated cell line name: %s. Aborting argument processing.", cell_line) raise ValueError("Repeated cell line name.") else: sample_list.append([cell_line, cell_result]) except ValueError as value_error: self.log.error(value_error) finally: self.log.debug("Closing file %s", results_file) data_file.close() return sample_list
def fetchDrugScoresByCellLine(self, input_folder): predictions_file = input_folder + "/" + RecommendationsService.PREDICTIONS_FILE drug_scores_by_cell_line = {} with open(predictions_file) as input_file: try: for line_index, line in enumerate(input_file): if line_index == 0: continue line_split = line.split(",") drug = line_split[0] cell_line = line_split[1] score = SafeCastUtil.safeCast(line_split[2], float) if not drug or not cell_line or not score: self.log.warning( "Invalid line detected for %s at line %s.", predictions_file, line_index + 1) continue if not drug_scores_by_cell_line.get(cell_line): drug_scores_by_cell_line[cell_line] = [(drug, score)] else: drug_scores_by_cell_line[cell_line].append( (drug, score)) except ValueError as error: self.log.error("Error parsing predictions file %s. %s", predictions_file, error) return drug_scores_by_cell_line
def fetchBestHyperparams(self, row, indices_of_outer_loops): monte_carlo_results = self.getMonteCarloResults( row, indices_of_outer_loops) best_hyps = None top_score = AbstractModelTrainer.DEFAULT_MIN_SCORE max_num_occurrences = 0 best_hyps_list = [] for hyperparam in SafeCastUtil.safeCast(monte_carlo_results.keys(), list): if len(monte_carlo_results.get(hyperparam)) > max_num_occurrences: max_num_occurrences = len(monte_carlo_results.get(hyperparam)) best_hyps_list = [hyperparam] elif len(monte_carlo_results.get( hyperparam)) == max_num_occurrences: best_hyps_list.append(hyperparam) if len(best_hyps_list) == 1: best_hyps = hyperparam top_score = numpy.average(monte_carlo_results.get(hyperparam)) elif len(best_hyps_list) > 1: top_score = 0 for hyperparam in best_hyps_list: if numpy.average( monte_carlo_results.get(hyperparam)) > top_score: top_score = numpy.average( monte_carlo_results.get(hyperparam)) best_hyps = hyperparam return best_hyps
def fetchOrReturnDefault(self, field, to_type, default): if field: if field.lower() == 'false' and to_type is bool: return False return SafeCastUtil.safeCast(field, to_type) else: return default
def writeToCSVInLock(self, line, input_folder, ml_algorithm, num_combos, outer_perms): lock = threading.Lock() lock.acquire(True) self.lockThreadMessage() file_name = ml_algorithm + ".csv" write_action = "w" if file_name in os.listdir(input_folder): write_action = "a" with open(input_folder + "/" + file_name, write_action, newline='') as csv_file: try: writer = csv.writer(csv_file) if write_action == "w": writer.writerow(self.getCSVFileHeader(self.inputs.is_classifier, ml_algorithm, outer_perms)) writer.writerow(line) except ValueError as error: self.log.error("Error writing to file %s. %s", file_name, error) finally: csv_file.close() total_lines = 0 with open(input_folder + "/" + file_name) as csv_file: try: reader = csv.reader(csv_file) total_lines += (len(SafeCastUtil.safeCast(reader, list)) - 1) except ValueError as error: self.log.error("Error reading lines from file %s. %s", file_name, error) finally: csv_file.close() self.logPercentDone(total_lines, num_combos, ml_algorithm) self.unlockThreadMessage() lock.release()
def assertRecsByCellLine(self, num_cell_lines, drug_names, target_dir): file_name = target_dir + "/" + RecommendationsService.PREDICTIONS_BY_CELL_LINE_FILE num_lines = 0 with open(file_name) as csv_file: try: for line_index, line in enumerate(csv_file): num_lines += 1 line_split = line.split(",") if line_index == 0: assert line_split[0] == "Cell Line" else: for i in range(0, len(line_split)): if i == 0: assert "cell_line" in line_split[i] elif i % 2 == 0: assert SafeCastUtil.safeCast(line_split[i], float) > AbstractModelTrainer.DEFAULT_MIN_SCORE elif i % 2 == 1: assert line_split[i] in drug_names except AssertionError as error: self.log.error(error) finally: self.log.debug("Closing file %s", file_name) csv_file.close() assert num_lines == num_cell_lines + 1