def load_tsv_to_net(net, file_buffer): import pandas as pd import categories lines = file_buffer.getvalue().split('\n') num_labels = categories.check_categories(lines) row_arr = range(num_labels['row']) col_arr = range(num_labels['col']) tmp_df = {} # use header if there are col categories if len(col_arr) > 1: tmp_df['mat'] = pd.read_table(file_buffer, index_col=row_arr, header=col_arr) else: tmp_df['mat'] = pd.read_table(file_buffer, index_col=row_arr) # remove columns with all nans, occurs when there are trailing # tabs on rows tmp_df['mat'] = tmp_df['mat'].dropna(axis=1) net.df_to_dat(tmp_df)
def load_tsv_to_net(net, file_buffer, filename=None): import pandas as pd import categories import proc_df_labels lines = file_buffer.getvalue().split('\n') num_labels = categories.check_categories(lines) row_arr = range(num_labels['row']) col_arr = range(num_labels['col']) tmp_df = {} # use header if there are col categories if len(col_arr) > 1: tmp_df['mat'] = pd.read_table(file_buffer, index_col=row_arr, header=col_arr) else: tmp_df['mat'] = pd.read_table(file_buffer, index_col=row_arr) tmp_df = proc_df_labels.main(tmp_df) net.df_to_dat(tmp_df) net.dat['filename'] = filename
def load_tsv_to_net(net, file_buffer, filename=None): import numpy as np import pandas as pd import categories from ast import literal_eval as make_tuple lines = file_buffer.getvalue().split('\n') num_labels = categories.check_categories(lines) row_arr = range(num_labels['row']) col_arr = range(num_labels['col']) tmp_df = {} # use header if there are col categories if len(col_arr) > 1: tmp_df['mat'] = pd.read_table(file_buffer, index_col=row_arr, header=col_arr) else: tmp_df['mat'] = pd.read_table(file_buffer, index_col=row_arr) # 1) check that rows are strings (in case of numerical names) # 2) check for tuples, and in that case load tuples to categories test = {} test['row'] = tmp_df['mat'].index.tolist() test['col'] = tmp_df['mat'].columns.tolist() # if type( test_row ) is not str and type( test_row ) is not tuple: found_tuple = {} found_number = {} for inst_rc in ['row','col']: inst_name = test[inst_rc][0] found_tuple[inst_rc] = False found_number[inst_rc] = False if type(inst_name) != tuple: if type(inst_name) is int or type(inst_name) is float or type(inst_name) is np.int64: found_number[inst_rc] = True else: check_open = inst_name[0] check_comma = inst_name.find(',') check_close = inst_name[-1] if check_open == '(' and check_close == ')' and check_comma > 0 \ and check_comma < len(inst_name): found_tuple[inst_rc] = True # convert to tuple if necessary ################################################# if found_tuple['row']: row_names = tmp_df['mat'].index.tolist() row_names = [make_tuple(x) for x in row_names] tmp_df['mat'].index = row_names if found_tuple['col']: col_names = tmp_df['mat'].columns.tolist() col_names = [make_tuple(x) for x in col_names] tmp_df['mat'].columns = col_names # convert numbers to string if necessary ################################################# if found_number['row']: row_names = tmp_df['mat'].index.tolist() row_names = [str(x) for x in row_names] tmp_df['mat'].index = row_names if found_number['col']: col_names = tmp_df['mat'].columns.tolist() col_names = [str(x) for x in col_names] tmp_df['mat'].columns = col_names # # remove columns with all nans, occurs when there are trailing # # tabs on rows # tmp_df['mat'] = tmp_df['mat'].dropna(axis=1, how='all') net.df_to_dat(tmp_df) net.dat['filename'] = filename
def load_tsv_to_net(net, file_buffer, filename=None): import numpy as np import pandas as pd import categories from ast import literal_eval as make_tuple lines = file_buffer.getvalue().split('\n') num_labels = categories.check_categories(lines) row_arr = range(num_labels['row']) col_arr = range(num_labels['col']) tmp_df = {} # use header if there are col categories if len(col_arr) > 1: tmp_df['mat'] = pd.read_table(file_buffer, index_col=row_arr, header=col_arr) else: tmp_df['mat'] = pd.read_table(file_buffer, index_col=row_arr) # 1) check that rows are strings (in case of numerical names) # 2) check for tuples, and in that case load tuples to categories test = {} test['row'] = tmp_df['mat'].index.tolist() test['col'] = tmp_df['mat'].columns.tolist() # if type( test_row ) is not str and type( test_row ) is not tuple: found_tuple = {} found_number = {} for inst_rc in ['row', 'col']: inst_name = test[inst_rc][0] found_tuple[inst_rc] = False found_number[inst_rc] = False if type(inst_name) != tuple: if type(inst_name) is int or type(inst_name) is float or type( inst_name) is np.int64: found_number[inst_rc] = True else: check_open = inst_name[0] check_comma = inst_name.find(',') check_close = inst_name[-1] if check_open == '(' and check_close == ')' and check_comma > 0 \ and check_comma < len(inst_name): found_tuple[inst_rc] = True # convert to tuple if necessary ################################################# if found_tuple['row']: row_names = tmp_df['mat'].index.tolist() row_names = [make_tuple(x) for x in row_names] tmp_df['mat'].index = row_names if found_tuple['col']: col_names = tmp_df['mat'].columns.tolist() col_names = [make_tuple(x) for x in col_names] tmp_df['mat'].columns = col_names # convert numbers to string if necessary ################################################# if found_number['row']: row_names = tmp_df['mat'].index.tolist() row_names = [str(x) for x in row_names] tmp_df['mat'].index = row_names if found_number['col']: col_names = tmp_df['mat'].columns.tolist() col_names = [str(x) for x in col_names] tmp_df['mat'].columns = col_names # # remove columns with all nans, occurs when there are trailing # # tabs on rows # tmp_df['mat'] = tmp_df['mat'].dropna(axis=1, how='all') net.df_to_dat(tmp_df) net.dat['filename'] = filename