コード例 #1
0
limit = 1000000
total_candidate_count = total_candidates_df.shape[0]

for offset in list(range(0, total_candidate_count, limit)):
    candidates = (session.query(GeneGene).filter(
        GeneGene.id.in_(total_candidates_df.candidate_id.astype(
            int).tolist())).offset(offset).limit(limit).all())

    max_length = total_candidates_df.sen_length.max()

    # if first iteration create the file
    if offset == 0:
        (generate_embedded_df(candidates, word_dict,
                              max_length=max_length).to_csv(
                                  "results/all_embedded_gg_sentences.tsv",
                                  index=False,
                                  sep="\t",
                                  mode="w"))

    # else append don't overwrite
    else:
        (generate_embedded_df(candidates, word_dict,
                              max_length=max_length).to_csv(
                                  "results/all_embedded_gg_sentences.tsv",
                                  index=False,
                                  sep="\t",
                                  mode="a",
                                  header=False))

# In[ ]:
コード例 #2
0
         )
     )
     .offset(offset)
     .limit(limit)
     .all()
 )
 
 max_length = total_candidates_df.sen_length.max()
 
 # if first iteration create the file
 if offset == 0:
     (
         generate_embedded_df(candidates, fixed_word_dict, max_length=max_length)
         .to_sparse()
         .to_csv(
             "output/all_embedded_cd_sentences.tsv",
             index=False, 
             sep="\t", 
             mode="w"
         )
     )
     
 # else append don't overwrite
 else:
     (
         generate_embedded_df(candidates, fixed_word_dict, max_length=max_length)
         .to_sparse()
         .to_csv(
             "output/all_embedded_cd_sentences.tsv",
             index=False, 
             sep="\t", 
             mode="a",