import pandas as pd

from catalog_preparation.catalog_creation_helper import remove_literally, all_cases_of_cases

food_file_path = '../data/food/food.tsv'
output_csv_path = '../data/food/food_usda_tidy.csv'

if __name__ == '__main__':
    food_data = pd.read_table(food_file_path, sep=';', encoding='cp1252', header=1, names=['group', 'name'])
    food_data = remove_literally(food_data, ['pie'])
    food_data_cases = all_cases_of_cases(food_data)
    food_data = pd.merge(food_data, food_data_cases, on=list(food_data), how='outer')
    food_data = food_data.drop_duplicates()
    food_data.to_csv(output_csv_path, index=False)
            synonyms = []
        try:
            groups = [group.value for group in stanza.tags['is_a']]
        except:
            groups = ['NA']

        names = [name] + synonyms

        # apostrophe1_names = [name.replace('\'', '’') for name in names if '\'' in name]
        # apostrophe2_names = [name.replace('’', '\'') for name in names if '’' in name]
        names = [re.sub('[’\']', '', name) for name in names]

        for disease_name in names:
            for group in groups:
                entry = {'id': disease_id,
                         'name': disease_name,
                         'group': group,
                         'obsolete': is_obsolete}
                disease_data = disease_data.append(entry, ignore_index=True)
    return disease_data

obo_path = "../data/diseases/doid.obo"
output_path = '../data/diseases/diseases_catalog.csv'

if __name__ == '__main__':
    disease_data = parse_obo(obo_path)
    disease_data = remove_literally(disease_data, ['disease'])
    disease_data_cases = all_cases_of_cases(disease_data)
    disease_data = pd.merge(disease_data, disease_data_cases, on=list(disease_data), how='outer')
    disease_data = disease_data.drop_duplicates()
    disease_data.to_csv(output_path, index=False, sep='\t')