def execute_pipeline(query, n_predictions=None):
    download_bnpp_data("./data/bnpp_newsroom_v1.1/")
    download_model("bert-squad_1.1", dir="./models")
    df = pd.read_csv(
        "./data/bnpp_newsroom_v1.1/bnpp_newsroom-v1.1.csv",
        converters={"paragraphs": literal_eval},
    )
    df = filter_paragraphs(df)

    cdqa_pipeline = QAPipeline(reader="models/bert_qa_vCPU-sklearn.joblib")
    cdqa_pipeline.fit_retriever(df)
    if torch.cuda.is_available():
        cdqa_pipeline.cuda()
    if n_predictions is not None:
        predictions = cdqa_pipeline.predict(query, n_predictions=n_predictions)
        result = []

        for answer, title, paragraph, score in predictions:
            prediction = (answer, title)
            result.append(prediction)
        return result
    else:
        prediction = cdqa_pipeline.predict(query)
        result = (prediction[0], prediction[1])
        return result
from cdqa.pipeline import QAPipeline
from cdqa.utils.evaluation import f1_score, exact_match_score

# dataset
df = pd.read_csv('./data/data_augmentation.csv', converters={'paragraphs': literal_eval},encoding='utf-8')
# paragraphs 새로 정의 : Title + Paragraph
df['paragraphs_old'] = df['paragraphs']
df['paragraphs'] = df.apply(lambda row: [row['title']] + row['paragraphs_old'], axis=1).copy()

data = pd.read_csv('./data/data_augmentation.csv',encoding='utf-8')
data_sampling = data.sample(100,random_state=66)

from cdqa.retriever import TfidfRetriever, BM25Retriever
cdqa_pipeline = QAPipeline(reader='bert_qa_multi_epoch3.joblib', retrieve_by_doc=True,retriever='bm25')
cdqa_pipeline.fit_retriever(df=df)
cdqa_pipeline.cuda()
retriever = BM25Retriever(ngram_range=(1,2), max_df=0.8, min_df=3, stop_words=None,lowercase=True, top_n=5)
retriever.fit(df=df)
def f1(dataframe,dataframe2):
    number = 0
    exact_number = 0
    # score = []
    answer_list=[] 
    while number < 100:
        # print("Question?")
        question = dataframe2.iloc[number,2] # 질문
        # question = input()
        best_idx_scores = retriever.predict(question)
        prediction = df.loc[best_idx_scores.keys()]['paragraphs'].apply(lambda x:x[1]).tolist()[0].replace(u'\xa0',u'')
        number+=1
        answer_list.append(prediction)