limit = 1000000 total_candidate_count = total_candidates_df.shape[0] for offset in list(range(0, total_candidate_count, limit)): candidates = (session.query(GeneGene).filter( GeneGene.id.in_(total_candidates_df.candidate_id.astype( int).tolist())).offset(offset).limit(limit).all()) max_length = total_candidates_df.sen_length.max() # if first iteration create the file if offset == 0: (generate_embedded_df(candidates, word_dict, max_length=max_length).to_csv( "results/all_embedded_gg_sentences.tsv", index=False, sep="\t", mode="w")) # else append don't overwrite else: (generate_embedded_df(candidates, word_dict, max_length=max_length).to_csv( "results/all_embedded_gg_sentences.tsv", index=False, sep="\t", mode="a", header=False)) # In[ ]:
) ) .offset(offset) .limit(limit) .all() ) max_length = total_candidates_df.sen_length.max() # if first iteration create the file if offset == 0: ( generate_embedded_df(candidates, fixed_word_dict, max_length=max_length) .to_sparse() .to_csv( "output/all_embedded_cd_sentences.tsv", index=False, sep="\t", mode="w" ) ) # else append don't overwrite else: ( generate_embedded_df(candidates, fixed_word_dict, max_length=max_length) .to_sparse() .to_csv( "output/all_embedded_cd_sentences.tsv", index=False, sep="\t", mode="a",