def test_query_df_schema(): agent = Agent(df, schema) q = 'how many people died of stomach cancer in 2011' sql = 'SELECT SUM(death_count) FROM cancer_death WHERE cancer_site = "Stomach" AND year = "2011" ' res = agent.get_query(q) if res.strip() != sql.strip(): raise AssertionError
def test_query_df(): agent = Agent(df) q = 'Get me the average age of stomach cancer deaths' sql = 'SELECT AVG(death_count) FROM dataframe WHERE cancer_site = "Stomach" ' res = agent.get_query(q) if res.strip() != sql.strip(): raise AssertionError
def test_query_df_schema(): agent = Agent(df, schema) q = 'how many people died of stomach cancer in 2011' sql = "SELECT SUM(death_count) FROM cancer_death WHERE cancer_site = 'Stomach' AND year = '2011' " res = agent.get_query(q) if res.strip() != sql.strip(): print("question", q) print("predicted", res) print("actual", sql) raise AssertionError
def test_query_df(): agent = Agent(df) q = 'Get me the average age of stomach cancer in male' sql = "SELECT AVG(age) FROM dataframe WHERE gender = 'Male' AND cancer_site = 'Stomach' " res = agent.get_query(q) if res.strip() != sql.strip(): print("question", q) print("predicted", res) print("actual", sql) raise AssertionError
def test_query_df(): agent = Agent(df) q = 'Get me the average age of stomach cancer deaths' sql = 'SELECT AVG(death_count) FROM dataframe WHERE cancer_site = "Stomach" ' res = agent.get_query(q) if res.strip() != sql.strip(): print("question", q) print("predicted", res) print("actual", sql) raise AssertionError
def test_query(): agent=Agent(os.path.join(currpath,"cleaned_data")) qmaps={'how many nuclear medicine activities in 2012':'SELECT COUNT(activity_type) FROM activities_data WHERE activity_type_chapter = "Nuclear Medicine" AND year = "2012" ', 'find me the diseases having above 3000 cases':'SELECT disease FROM communicable_diseases_data WHERE cases > 3000 ', 'which are the activities in 2011':'SELECT activity_type FROM activities_data WHERE year = "2011" ', 'find the maximum number of cases':'SELECT MAX(cases) FROM communicable_diseases_data', 'Get me the average age of stomach cancer deaths':'SELECT AVG(death_count) FROM cancer_death_data WHERE cancer_site = "Stomach" ' } for q,sql in qmaps.items(): res=agent.get_query(q) assert res.strip() ==sql.strip()
def test_query_schema(): agent=Agent(os.path.join(currpath,"cleaned_data"),os.path.join(currpath,"schema")) qmaps={'how many people died of stomach cancer in 2011':'SELECT SUM(death_count) FROM cancer_death WHERE cancer_site = "Stomach" AND year = "2011" ', 'how many deaths of age below 40 had stomach cancer':'SELECT SUM(death_count) FROM cancer_death WHERE cancer_site = "Stomach" AND age < 40 ', 'which are the activities in 2011':'SELECT activity_type_chapter FROM activities WHERE year = "2011" ', 'find the maximum number of cases':'SELECT MAX(cases) FROM communicable_diseases ', 'Get me the average age of stomach cancer deaths':'SELECT AVG(death_count) FROM cancer_death WHERE cancer_site = "Stomach" ' } for q,sql in qmaps.items(): res=agent.get_query(q) assert res.strip() ==sql.strip()
def simple_upload(request): try: if request.method == 'POST' and request.FILES['myfile']: myfile = request.FILES['myfile'] query = request.POST['query'] df = pd.read_csv(myfile) agent = Agent(df) database_result = agent.query_db(query) database_query = agent.get_query(question=query) return render(request, 'agent/simple_upload.html', { 'database_result': database_result, 'database_query': database_query }) return render(request, 'agent/simple_upload.html') except MultiValueDictKeyError: database_result = "Please Upload a CSV" return render(request, 'agent/simple_upload.html',{ 'database_result':database_result })
def get_results(self, text, csv_dir=None, schema_dir=None): """ Parameters ---------- text : `str` or `pathlib.Path` object. Input text to be processed or path to file(.txt/.docx) containing textual information. csv_path : `str` or `pathlib.Path` object, absolute path to folder containing all input files. Optional schema_path: `str` or `pathlib.Path` object, path to folder containing `json` schemas of input files. If not specified, auto-generated schema will be used.Optional Returns ------- valmaps : `dict` Maps questions with generated answers. """ valmaps = {} if os.path.isfile(text): text = self.__text_process(text) if csv_dir: questions = self.get_questions(text, csv=True) agent = Agent(csv_dir, schema_dir) for q in questions: try: res = agent.query_db(q) valmaps[q] = res except: pass else: questions = self.get_questions(text) for q in questions: valmaps[q] = qa(text, q) return valmaps
def test_query(): agent = Agent(os.path.join(currpath, "cleaned_data")) qmaps = { 'how many nuclear medicine activities in 2012': "SELECT COUNT(activity_type) FROM activities_data WHERE activity_type_chapter = 'Nuclear Medicine' AND year = '2012' ", 'find me the diseases having above 3000 cases': 'SELECT disease FROM communicable_diseases_data WHERE cases > 3000 ', 'which are the activities in 2011': "SELECT activity_type,activity_type_chapter FROM activities_data WHERE year = '2011' ", 'find the maximum number of cases': "SELECT MAX(cases) FROM communicable_diseases_data", 'Get me the average of stomach cancer deaths': "SELECT AVG(death_count) FROM cancer_death_data WHERE cancer_site = 'Stomach' ", 'how many activities between 2011 and 2014': "SELECT COUNT(activity_type) FROM activities_data WHERE year BETWEEN 2011 AND 2014" } for q, sql in qmaps.items(): res = agent.get_query(q) if res.strip() != sql.strip(): print("question", q) print("predicted", res) print("actual", sql) raise AssertionError
def test_query_schema(): agent = Agent(os.path.join(currpath, "cleaned_data"), os.path.join(currpath, "schema")) qmaps = { 'how many people died of stomach cancer in 2011': "SELECT SUM(death_count) FROM cancer_death WHERE cancer_site = 'Stomach' AND year = '2011' ", 'how many deaths of age below 40 had stomach cancer': "SELECT SUM(death_count) FROM cancer_death WHERE cancer_site = 'Stomach' AND age < 40 ", 'which are the activities in 2011': "SELECT activity_type_chapter,activity_type FROM activities WHERE year = '2011' ", 'find the maximum number of cases': "SELECT MAX(cases) FROM communicable_diseases ", 'Get me the average of stomach cancer deaths': "SELECT AVG(death_count) FROM cancer_death WHERE cancer_site = 'Stomach' ", 'how many people between age 30 and 40 died of stomach cancer': "SELECT SUM(death_count) FROM cancer_death WHERE cancer_site = 'Stomach' AND age BETWEEN 30 AND 40 " } for q, sql in qmaps.items(): res = agent.get_query(q) if res.strip() != sql.strip(): print("question", q) print("predicted", res) print("actual", sql) raise AssertionError