Python check_categories Examples, categories.check_categories Python Examples

Example #1

0

Show file

File: load_data.py Project: jjdblast/clustergrammer.js

def load_tsv_to_net(net, file_buffer):
  import pandas as pd
  import categories

  lines = file_buffer.getvalue().split('\n')
  num_labels = categories.check_categories(lines)

  row_arr = range(num_labels['row'])
  col_arr = range(num_labels['col'])
  tmp_df = {}

  # use header if there are col categories
  if len(col_arr) > 1:
    tmp_df['mat'] = pd.read_table(file_buffer, index_col=row_arr,
                                  header=col_arr)
  else:
    tmp_df['mat'] = pd.read_table(file_buffer, index_col=row_arr)

  # remove columns with all nans, occurs when there are trailing
  # tabs on rows
  tmp_df['mat'] = tmp_df['mat'].dropna(axis=1)

  net.df_to_dat(tmp_df)

Example #2

0

Show file

File: load_data.py Project: MaayanLab/IDG_poster_2016

def load_tsv_to_net(net, file_buffer, filename=None):
  import pandas as pd
  import categories
  import proc_df_labels

  lines = file_buffer.getvalue().split('\n')
  num_labels = categories.check_categories(lines)

  row_arr = range(num_labels['row'])
  col_arr = range(num_labels['col'])
  tmp_df = {}

  # use header if there are col categories
  if len(col_arr) > 1:
    tmp_df['mat'] = pd.read_table(file_buffer, index_col=row_arr,
                                  header=col_arr)
  else:
    tmp_df['mat'] = pd.read_table(file_buffer, index_col=row_arr)

  tmp_df = proc_df_labels.main(tmp_df)

  net.df_to_dat(tmp_df)
  net.dat['filename'] = filename

Example #3

0

Show file

def load_tsv_to_net(net, file_buffer, filename=None):
    import pandas as pd
    import categories
    import proc_df_labels

    lines = file_buffer.getvalue().split('\n')
    num_labels = categories.check_categories(lines)

    row_arr = range(num_labels['row'])
    col_arr = range(num_labels['col'])
    tmp_df = {}

    # use header if there are col categories
    if len(col_arr) > 1:
        tmp_df['mat'] = pd.read_table(file_buffer,
                                      index_col=row_arr,
                                      header=col_arr)
    else:
        tmp_df['mat'] = pd.read_table(file_buffer, index_col=row_arr)

    tmp_df = proc_df_labels.main(tmp_df)

    net.df_to_dat(tmp_df)
    net.dat['filename'] = filename

Example #4

0

Show file

File: load_data.py Project: ErwanDavid/clustergrammer.js

def load_tsv_to_net(net, file_buffer, filename=None):
  import numpy as np
  import pandas as pd
  import categories
  from ast import literal_eval as make_tuple

  lines = file_buffer.getvalue().split('\n')
  num_labels = categories.check_categories(lines)

  row_arr = range(num_labels['row'])
  col_arr = range(num_labels['col'])
  tmp_df = {}

  # use header if there are col categories
  if len(col_arr) > 1:
    tmp_df['mat'] = pd.read_table(file_buffer, index_col=row_arr,
                                  header=col_arr)
  else:
    tmp_df['mat'] = pd.read_table(file_buffer, index_col=row_arr)

  # 1) check that rows are strings (in case of numerical names)
  # 2) check for tuples, and in that case load tuples to categories 
  test = {} 
  test['row'] = tmp_df['mat'].index.tolist()
  test['col'] = tmp_df['mat'].columns.tolist()

  # if type( test_row ) is not str and type( test_row ) is not tuple:

  found_tuple = {}
  found_number = {}
  for inst_rc in ['row','col']:

    inst_name = test[inst_rc][0]

    found_tuple[inst_rc] = False
    found_number[inst_rc] = False

    if type(inst_name) != tuple:

      if type(inst_name) is int or type(inst_name) is float or type(inst_name) is np.int64:
        found_number[inst_rc] = True

      else:
        check_open = inst_name[0]
        check_comma = inst_name.find(',')
        check_close = inst_name[-1]

        if check_open == '(' and check_close == ')' and check_comma > 0 \
          and check_comma < len(inst_name):
          found_tuple[inst_rc] = True

  # convert to tuple if necessary 
  #################################################
  if found_tuple['row']:
    row_names = tmp_df['mat'].index.tolist()
    row_names = [make_tuple(x) for x in row_names]
    tmp_df['mat'].index = row_names

  if found_tuple['col']:
    col_names = tmp_df['mat'].columns.tolist()
    col_names = [make_tuple(x) for x in col_names]
    tmp_df['mat'].columns = col_names

  # convert numbers to string if necessary 
  #################################################
  if found_number['row']:
    row_names = tmp_df['mat'].index.tolist()
    row_names = [str(x) for x in row_names]
    tmp_df['mat'].index = row_names

  if found_number['col']:
    col_names = tmp_df['mat'].columns.tolist()
    col_names = [str(x) for x in col_names]
    tmp_df['mat'].columns = col_names

  # # remove columns with all nans, occurs when there are trailing
  # # tabs on rows
  # tmp_df['mat'] = tmp_df['mat'].dropna(axis=1, how='all')

  net.df_to_dat(tmp_df)
  net.dat['filename'] = filename

Example #5

0

Show file

def load_tsv_to_net(net, file_buffer, filename=None):
    import numpy as np
    import pandas as pd
    import categories
    from ast import literal_eval as make_tuple

    lines = file_buffer.getvalue().split('\n')
    num_labels = categories.check_categories(lines)

    row_arr = range(num_labels['row'])
    col_arr = range(num_labels['col'])
    tmp_df = {}

    # use header if there are col categories
    if len(col_arr) > 1:
        tmp_df['mat'] = pd.read_table(file_buffer,
                                      index_col=row_arr,
                                      header=col_arr)
    else:
        tmp_df['mat'] = pd.read_table(file_buffer, index_col=row_arr)

    # 1) check that rows are strings (in case of numerical names)
    # 2) check for tuples, and in that case load tuples to categories
    test = {}
    test['row'] = tmp_df['mat'].index.tolist()
    test['col'] = tmp_df['mat'].columns.tolist()

    # if type( test_row ) is not str and type( test_row ) is not tuple:

    found_tuple = {}
    found_number = {}
    for inst_rc in ['row', 'col']:

        inst_name = test[inst_rc][0]

        found_tuple[inst_rc] = False
        found_number[inst_rc] = False

        if type(inst_name) != tuple:

            if type(inst_name) is int or type(inst_name) is float or type(
                    inst_name) is np.int64:
                found_number[inst_rc] = True

            else:
                check_open = inst_name[0]
                check_comma = inst_name.find(',')
                check_close = inst_name[-1]

                if check_open == '(' and check_close == ')' and check_comma > 0 \
                  and check_comma < len(inst_name):
                    found_tuple[inst_rc] = True

    # convert to tuple if necessary
    #################################################
    if found_tuple['row']:
        row_names = tmp_df['mat'].index.tolist()
        row_names = [make_tuple(x) for x in row_names]
        tmp_df['mat'].index = row_names

    if found_tuple['col']:
        col_names = tmp_df['mat'].columns.tolist()
        col_names = [make_tuple(x) for x in col_names]
        tmp_df['mat'].columns = col_names

    # convert numbers to string if necessary
    #################################################
    if found_number['row']:
        row_names = tmp_df['mat'].index.tolist()
        row_names = [str(x) for x in row_names]
        tmp_df['mat'].index = row_names

    if found_number['col']:
        col_names = tmp_df['mat'].columns.tolist()
        col_names = [str(x) for x in col_names]
        tmp_df['mat'].columns = col_names

    # # remove columns with all nans, occurs when there are trailing
    # # tabs on rows
    # tmp_df['mat'] = tmp_df['mat'].dropna(axis=1, how='all')

    net.df_to_dat(tmp_df)
    net.dat['filename'] = filename